{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:26:13.402137', 'step': 0, 'epoch': 0} {'type': 'pplx', 'content': 21944.183071258598, 'timestamp': '2025-09-10 02:26:13.405535', 'step': 0, 'epoch': 0} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:13.477123', 'step': 0, 'epoch': 1} {'type': 'loss', 'content': 1.0041145086288452, 'timestamp': '2025-09-10 02:26:13.479712', 'step': 1, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:13.511099', 'step': 1, 'epoch': 1} {'type': 'loss', 'content': 1.0089284181594849, 'timestamp': '2025-09-10 02:26:13.512982', 'step': 2, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.555218', 'step': 2, 'epoch': 1} {'type': 'loss', 'content': 0.9882441759109497, 'timestamp': '2025-09-10 02:26:13.557271', 'step': 3, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:13.594013', 'step': 3, 'epoch': 1} {'type': 'loss', 'content': 0.9745813608169556, 'timestamp': '2025-09-10 02:26:13.657931', 'step': 4, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:13.690990', 'step': 4, 'epoch': 1} {'type': 'loss', 'content': 0.3493274748325348, 'timestamp': '2025-09-10 02:26:13.692866', 'step': 5, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:13.723078', 'step': 5, 'epoch': 1} {'type': 'loss', 'content': 0.2965392768383026, 'timestamp': '2025-09-10 02:26:13.725347', 'step': 6, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:13.755462', 'step': 6, 'epoch': 1} {'type': 'loss', 'content': 0.3332878053188324, 'timestamp': '2025-09-10 02:26:13.758089', 'step': 7, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:13.788577', 'step': 7, 'epoch': 1} {'type': 'loss', 'content': 0.3120311200618744, 'timestamp': '2025-09-10 02:26:13.812828', 'step': 8, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:13.842670', 'step': 8, 'epoch': 1} {'type': 'loss', 'content': 0.19788001477718353, 'timestamp': '2025-09-10 02:26:13.845336', 'step': 9, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.875842', 'step': 9, 'epoch': 1} {'type': 'loss', 'content': 0.24480819702148438, 'timestamp': '2025-09-10 02:26:13.878071', 'step': 10, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.910592', 'step': 10, 'epoch': 1} {'type': 'loss', 'content': 0.1303098499774933, 'timestamp': '2025-09-10 02:26:13.912946', 'step': 11, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:13.942880', 'step': 11, 'epoch': 1} {'type': 'loss', 'content': 0.2674413323402405, 'timestamp': '2025-09-10 02:26:13.966741', 'step': 12, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:13.997868', 'step': 12, 'epoch': 1} {'type': 'loss', 'content': 0.1668558418750763, 'timestamp': '2025-09-10 02:26:13.999910', 'step': 13, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:14.029554', 'step': 13, 'epoch': 1} {'type': 'loss', 'content': 0.17541316151618958, 'timestamp': '2025-09-10 02:26:14.031575', 'step': 14, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.060992', 'step': 14, 'epoch': 1} {'type': 'loss', 'content': 0.189327672123909, 'timestamp': '2025-09-10 02:26:14.062879', 'step': 15, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:14.092825', 'step': 15, 'epoch': 1} {'type': 'loss', 'content': 0.2906024158000946, 'timestamp': '2025-09-10 02:26:14.116289', 'step': 16, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.145792', 'step': 16, 'epoch': 1} {'type': 'loss', 'content': 0.16668717563152313, 'timestamp': '2025-09-10 02:26:14.147714', 'step': 17, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.177075', 'step': 17, 'epoch': 1} {'type': 'loss', 'content': 0.2425771951675415, 'timestamp': '2025-09-10 02:26:14.179063', 'step': 18, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:14.226318', 'step': 18, 'epoch': 1} {'type': 'loss', 'content': 0.30201563239097595, 'timestamp': '2025-09-10 02:26:14.228302', 'step': 19, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.257553', 'step': 19, 'epoch': 1} {'type': 'loss', 'content': 0.1461743712425232, 'timestamp': '2025-09-10 02:26:14.280943', 'step': 20, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.310618', 'step': 20, 'epoch': 1} {'type': 'loss', 'content': 0.3025420904159546, 'timestamp': '2025-09-10 02:26:14.313256', 'step': 21, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:14.343669', 'step': 21, 'epoch': 1} {'type': 'loss', 'content': 0.2744583785533905, 'timestamp': '2025-09-10 02:26:14.345745', 'step': 22, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:14.376877', 'step': 22, 'epoch': 1} {'type': 'loss', 'content': 0.1275661438703537, 'timestamp': '2025-09-10 02:26:14.379441', 'step': 23, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.410041', 'step': 23, 'epoch': 1} {'type': 'loss', 'content': 0.30871880054473877, 'timestamp': '2025-09-10 02:26:14.433584', 'step': 24, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.464391', 'step': 24, 'epoch': 1} {'type': 'loss', 'content': 0.21316787600517273, 'timestamp': '2025-09-10 02:26:14.466596', 'step': 25, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:14.505840', 'step': 25, 'epoch': 1} {'type': 'loss', 'content': 0.20950211584568024, 'timestamp': '2025-09-10 02:26:14.508283', 'step': 26, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.538792', 'step': 26, 'epoch': 1} {'type': 'loss', 'content': 0.37757372856140137, 'timestamp': '2025-09-10 02:26:14.540753', 'step': 27, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:14.570404', 'step': 27, 'epoch': 1} {'type': 'loss', 'content': 0.26793429255485535, 'timestamp': '2025-09-10 02:26:14.593841', 'step': 28, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:14.623781', 'step': 28, 'epoch': 1} {'type': 'loss', 'content': 0.17208173871040344, 'timestamp': '2025-09-10 02:26:14.626092', 'step': 29, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.657899', 'step': 29, 'epoch': 1} {'type': 'loss', 'content': 0.11617887020111084, 'timestamp': '2025-09-10 02:26:14.659922', 'step': 30, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.689712', 'step': 30, 'epoch': 1} {'type': 'loss', 'content': 0.215238556265831, 'timestamp': '2025-09-10 02:26:14.691575', 'step': 31, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:14.720581', 'step': 31, 'epoch': 1} {'type': 'loss', 'content': 0.16789598762989044, 'timestamp': '2025-09-10 02:26:14.743893', 'step': 32, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.774689', 'step': 32, 'epoch': 1} {'type': 'loss', 'content': 0.24503272771835327, 'timestamp': '2025-09-10 02:26:14.776660', 'step': 33, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.806314', 'step': 33, 'epoch': 1} {'type': 'loss', 'content': 0.24919863045215607, 'timestamp': '2025-09-10 02:26:14.808265', 'step': 34, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:14.838120', 'step': 34, 'epoch': 1} {'type': 'loss', 'content': 0.33478420972824097, 'timestamp': '2025-09-10 02:26:14.840181', 'step': 35, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:14.870037', 'step': 35, 'epoch': 1} {'type': 'loss', 'content': 0.18602339923381805, 'timestamp': '2025-09-10 02:26:14.893513', 'step': 36, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.923224', 'step': 36, 'epoch': 1} {'type': 'loss', 'content': 0.32776907086372375, 'timestamp': '2025-09-10 02:26:14.925341', 'step': 37, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.955486', 'step': 37, 'epoch': 1} {'type': 'loss', 'content': 0.19521194696426392, 'timestamp': '2025-09-10 02:26:14.957787', 'step': 38, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.987954', 'step': 38, 'epoch': 1} {'type': 'loss', 'content': 0.30945780873298645, 'timestamp': '2025-09-10 02:26:14.995090', 'step': 39, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.027238', 'step': 39, 'epoch': 1} {'type': 'loss', 'content': 0.2715054154396057, 'timestamp': '2025-09-10 02:26:15.050918', 'step': 40, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:15.081940', 'step': 40, 'epoch': 1} {'type': 'loss', 'content': 0.1634029895067215, 'timestamp': '2025-09-10 02:26:15.084161', 'step': 41, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.114700', 'step': 41, 'epoch': 1} {'type': 'loss', 'content': 0.18897323310375214, 'timestamp': '2025-09-10 02:26:15.116657', 'step': 42, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.146804', 'step': 42, 'epoch': 1} {'type': 'loss', 'content': 0.1733444482088089, 'timestamp': '2025-09-10 02:26:15.148836', 'step': 43, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.178368', 'step': 43, 'epoch': 1} {'type': 'loss', 'content': 0.26167240738868713, 'timestamp': '2025-09-10 02:26:15.201572', 'step': 44, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.232569', 'step': 44, 'epoch': 1} {'type': 'loss', 'content': 0.24059762060642242, 'timestamp': '2025-09-10 02:26:15.234560', 'step': 45, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.264051', 'step': 45, 'epoch': 1} {'type': 'loss', 'content': 0.25486910343170166, 'timestamp': '2025-09-10 02:26:15.265996', 'step': 46, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:15.295537', 'step': 46, 'epoch': 1} {'type': 'loss', 'content': 0.20476314425468445, 'timestamp': '2025-09-10 02:26:15.297786', 'step': 47, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.328380', 'step': 47, 'epoch': 1} {'type': 'loss', 'content': 0.31738483905792236, 'timestamp': '2025-09-10 02:26:15.351790', 'step': 48, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:15.381672', 'step': 48, 'epoch': 1} {'type': 'loss', 'content': 0.18251438438892365, 'timestamp': '2025-09-10 02:26:15.383667', 'step': 49, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.416658', 'step': 49, 'epoch': 1} {'type': 'loss', 'content': 0.19198308885097504, 'timestamp': '2025-09-10 02:26:15.418682', 'step': 50, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.447661', 'step': 50, 'epoch': 1} {'type': 'loss', 'content': 0.24287214875221252, 'timestamp': '2025-09-10 02:26:15.449645', 'step': 51, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.479076', 'step': 51, 'epoch': 1} {'type': 'loss', 'content': 0.33463913202285767, 'timestamp': '2025-09-10 02:26:15.502443', 'step': 52, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.532878', 'step': 52, 'epoch': 1} {'type': 'loss', 'content': 0.19788430631160736, 'timestamp': '2025-09-10 02:26:15.534981', 'step': 53, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:15.564906', 'step': 53, 'epoch': 1} {'type': 'loss', 'content': 0.20979510247707367, 'timestamp': '2025-09-10 02:26:15.567018', 'step': 54, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:15.596783', 'step': 54, 'epoch': 1} {'type': 'loss', 'content': 0.24060118198394775, 'timestamp': '2025-09-10 02:26:15.599267', 'step': 55, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:15.644625', 'step': 55, 'epoch': 1} {'type': 'loss', 'content': 0.10719262063503265, 'timestamp': '2025-09-10 02:26:15.667975', 'step': 56, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:15.698234', 'step': 56, 'epoch': 1} {'type': 'loss', 'content': 0.17499929666519165, 'timestamp': '2025-09-10 02:26:15.700340', 'step': 57, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.729723', 'step': 57, 'epoch': 1} {'type': 'loss', 'content': 0.20688991248607635, 'timestamp': '2025-09-10 02:26:15.731881', 'step': 58, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.761433', 'step': 58, 'epoch': 1} {'type': 'loss', 'content': 0.22268588840961456, 'timestamp': '2025-09-10 02:26:15.763342', 'step': 59, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:15.794330', 'step': 59, 'epoch': 1} {'type': 'loss', 'content': 0.17318099737167358, 'timestamp': '2025-09-10 02:26:15.817740', 'step': 60, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:15.847669', 'step': 60, 'epoch': 1} {'type': 'loss', 'content': 0.19815072417259216, 'timestamp': '2025-09-10 02:26:15.849557', 'step': 61, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:15.879397', 'step': 61, 'epoch': 1} {'type': 'loss', 'content': 0.14437247812747955, 'timestamp': '2025-09-10 02:26:15.881369', 'step': 62, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.910855', 'step': 62, 'epoch': 1} {'type': 'loss', 'content': 0.22565339505672455, 'timestamp': '2025-09-10 02:26:15.913182', 'step': 63, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.942802', 'step': 63, 'epoch': 1} {'type': 'loss', 'content': 0.24928919970989227, 'timestamp': '2025-09-10 02:26:15.966009', 'step': 64, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.996853', 'step': 64, 'epoch': 1} {'type': 'loss', 'content': 0.13086716830730438, 'timestamp': '2025-09-10 02:26:15.999544', 'step': 65, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:16.030641', 'step': 65, 'epoch': 1} {'type': 'loss', 'content': 0.255426287651062, 'timestamp': '2025-09-10 02:26:16.032734', 'step': 66, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:16.064791', 'step': 66, 'epoch': 1} {'type': 'loss', 'content': 0.28368523716926575, 'timestamp': '2025-09-10 02:26:16.067015', 'step': 67, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.098312', 'step': 67, 'epoch': 1} {'type': 'loss', 'content': 0.1985204666852951, 'timestamp': '2025-09-10 02:26:16.121855', 'step': 68, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:16.152422', 'step': 68, 'epoch': 1} {'type': 'loss', 'content': 0.23835501074790955, 'timestamp': '2025-09-10 02:26:16.154563', 'step': 69, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.184001', 'step': 69, 'epoch': 1} {'type': 'loss', 'content': 0.25446033477783203, 'timestamp': '2025-09-10 02:26:16.186027', 'step': 70, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.215486', 'step': 70, 'epoch': 1} {'type': 'loss', 'content': 0.1633237898349762, 'timestamp': '2025-09-10 02:26:16.217544', 'step': 71, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.248787', 'step': 71, 'epoch': 1} {'type': 'loss', 'content': 0.2020808607339859, 'timestamp': '2025-09-10 02:26:16.272279', 'step': 72, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.303938', 'step': 72, 'epoch': 1} {'type': 'loss', 'content': 0.19922861456871033, 'timestamp': '2025-09-10 02:26:16.306334', 'step': 73, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.338122', 'step': 73, 'epoch': 1} {'type': 'loss', 'content': 0.20208601653575897, 'timestamp': '2025-09-10 02:26:16.340625', 'step': 74, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:16.373692', 'step': 74, 'epoch': 1} {'type': 'loss', 'content': 0.18548063933849335, 'timestamp': '2025-09-10 02:26:16.376431', 'step': 75, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.407667', 'step': 75, 'epoch': 1} {'type': 'loss', 'content': 0.3292427957057953, 'timestamp': '2025-09-10 02:26:16.430965', 'step': 76, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:16.460966', 'step': 76, 'epoch': 1} {'type': 'loss', 'content': 0.16806355118751526, 'timestamp': '2025-09-10 02:26:16.463271', 'step': 77, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.493149', 'step': 77, 'epoch': 1} {'type': 'loss', 'content': 0.2102643996477127, 'timestamp': '2025-09-10 02:26:16.495521', 'step': 78, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.525294', 'step': 78, 'epoch': 1} {'type': 'loss', 'content': 0.16511382162570953, 'timestamp': '2025-09-10 02:26:16.527123', 'step': 79, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.557867', 'step': 79, 'epoch': 1} {'type': 'loss', 'content': 0.27007368206977844, 'timestamp': '2025-09-10 02:26:16.581356', 'step': 80, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:16.611087', 'step': 80, 'epoch': 1} {'type': 'loss', 'content': 0.24237994849681854, 'timestamp': '2025-09-10 02:26:16.613179', 'step': 81, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.644596', 'step': 81, 'epoch': 1} {'type': 'loss', 'content': 0.19028526544570923, 'timestamp': '2025-09-10 02:26:16.646673', 'step': 82, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:16.676370', 'step': 82, 'epoch': 1} {'type': 'loss', 'content': 0.13748791813850403, 'timestamp': '2025-09-10 02:26:16.679602', 'step': 83, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.709703', 'step': 83, 'epoch': 1} {'type': 'loss', 'content': 0.23162393271923065, 'timestamp': '2025-09-10 02:26:16.733423', 'step': 84, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.764457', 'step': 84, 'epoch': 1} {'type': 'loss', 'content': 0.19261108338832855, 'timestamp': '2025-09-10 02:26:16.766572', 'step': 85, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:16.795898', 'step': 85, 'epoch': 1} {'type': 'loss', 'content': 0.19612541794776917, 'timestamp': '2025-09-10 02:26:16.797693', 'step': 86, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.827777', 'step': 86, 'epoch': 1} {'type': 'loss', 'content': 0.21474352478981018, 'timestamp': '2025-09-10 02:26:16.830342', 'step': 87, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:16.861556', 'step': 87, 'epoch': 1} {'type': 'loss', 'content': 0.2230081409215927, 'timestamp': '2025-09-10 02:26:16.884943', 'step': 88, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.916047', 'step': 88, 'epoch': 1} {'type': 'loss', 'content': 0.14357171952724457, 'timestamp': '2025-09-10 02:26:16.917989', 'step': 89, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.947522', 'step': 89, 'epoch': 1} {'type': 'loss', 'content': 0.24565821886062622, 'timestamp': '2025-09-10 02:26:16.949513', 'step': 90, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.979202', 'step': 90, 'epoch': 1} {'type': 'loss', 'content': 0.18305043876171112, 'timestamp': '2025-09-10 02:26:16.982754', 'step': 91, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.012809', 'step': 91, 'epoch': 1} {'type': 'loss', 'content': 0.15728530287742615, 'timestamp': '2025-09-10 02:26:17.036095', 'step': 92, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.066811', 'step': 92, 'epoch': 1} {'type': 'loss', 'content': 0.24677817523479462, 'timestamp': '2025-09-10 02:26:17.069008', 'step': 93, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:17.099474', 'step': 93, 'epoch': 1} {'type': 'loss', 'content': 0.21889077126979828, 'timestamp': '2025-09-10 02:26:17.102231', 'step': 94, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.134010', 'step': 94, 'epoch': 1} {'type': 'loss', 'content': 0.16838012635707855, 'timestamp': '2025-09-10 02:26:17.136797', 'step': 95, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:17.168615', 'step': 95, 'epoch': 1} {'type': 'loss', 'content': 0.2201562523841858, 'timestamp': '2025-09-10 02:26:17.193064', 'step': 96, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.240717', 'step': 96, 'epoch': 1} {'type': 'loss', 'content': 0.2032143771648407, 'timestamp': '2025-09-10 02:26:17.246389', 'step': 97, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.281475', 'step': 97, 'epoch': 1} {'type': 'loss', 'content': 0.225465789437294, 'timestamp': '2025-09-10 02:26:17.284231', 'step': 98, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:17.320013', 'step': 98, 'epoch': 1} {'type': 'loss', 'content': 0.15875202417373657, 'timestamp': '2025-09-10 02:26:17.323285', 'step': 99, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.357796', 'step': 99, 'epoch': 1} {'type': 'loss', 'content': 0.24574606120586395, 'timestamp': '2025-09-10 02:26:17.381104', 'step': 100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:17.414015', 'step': 100, 'epoch': 1} {'type': 'loss', 'content': 0.24176718294620514, 'timestamp': '2025-09-10 02:26:17.416100', 'step': 101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.445843', 'step': 101, 'epoch': 1} {'type': 'loss', 'content': 0.24520106613636017, 'timestamp': '2025-09-10 02:26:17.448009', 'step': 102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:17.478936', 'step': 102, 'epoch': 1} {'type': 'loss', 'content': 0.3294447064399719, 'timestamp': '2025-09-10 02:26:17.481192', 'step': 103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.513413', 'step': 103, 'epoch': 1} {'type': 'loss', 'content': 0.21040098369121552, 'timestamp': '2025-09-10 02:26:17.536834', 'step': 104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.567464', 'step': 104, 'epoch': 1} {'type': 'loss', 'content': 0.18595004081726074, 'timestamp': '2025-09-10 02:26:17.569653', 'step': 105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.599796', 'step': 105, 'epoch': 1} {'type': 'loss', 'content': 0.24555101990699768, 'timestamp': '2025-09-10 02:26:17.601868', 'step': 106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:17.632623', 'step': 106, 'epoch': 1} {'type': 'loss', 'content': 0.34271031618118286, 'timestamp': '2025-09-10 02:26:17.634920', 'step': 107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.665458', 'step': 107, 'epoch': 1} {'type': 'loss', 'content': 0.3291020393371582, 'timestamp': '2025-09-10 02:26:17.689767', 'step': 108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:17.720936', 'step': 108, 'epoch': 1} {'type': 'loss', 'content': 0.19459791481494904, 'timestamp': '2025-09-10 02:26:17.722969', 'step': 109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.752095', 'step': 109, 'epoch': 1} {'type': 'loss', 'content': 0.2752498388290405, 'timestamp': '2025-09-10 02:26:17.754400', 'step': 110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.784476', 'step': 110, 'epoch': 1} {'type': 'loss', 'content': 0.15056289732456207, 'timestamp': '2025-09-10 02:26:17.786400', 'step': 111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.817218', 'step': 111, 'epoch': 1} {'type': 'loss', 'content': 0.16587314009666443, 'timestamp': '2025-09-10 02:26:17.840873', 'step': 112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.870505', 'step': 112, 'epoch': 1} {'type': 'loss', 'content': 0.20316939055919647, 'timestamp': '2025-09-10 02:26:17.872636', 'step': 113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.902047', 'step': 113, 'epoch': 1} {'type': 'loss', 'content': 0.34254470467567444, 'timestamp': '2025-09-10 02:26:17.903843', 'step': 114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:17.932735', 'step': 114, 'epoch': 1} {'type': 'loss', 'content': 0.17819203436374664, 'timestamp': '2025-09-10 02:26:17.935708', 'step': 115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.967127', 'step': 115, 'epoch': 1} {'type': 'loss', 'content': 0.2669474184513092, 'timestamp': '2025-09-10 02:26:17.990924', 'step': 116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.025690', 'step': 116, 'epoch': 1} {'type': 'loss', 'content': 0.19552886486053467, 'timestamp': '2025-09-10 02:26:18.027879', 'step': 117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.057341', 'step': 117, 'epoch': 1} {'type': 'loss', 'content': 0.2173379808664322, 'timestamp': '2025-09-10 02:26:18.059529', 'step': 118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:18.090176', 'step': 118, 'epoch': 1} {'type': 'loss', 'content': 0.2227412313222885, 'timestamp': '2025-09-10 02:26:18.092327', 'step': 119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.123399', 'step': 119, 'epoch': 1} {'type': 'loss', 'content': 0.2685222923755646, 'timestamp': '2025-09-10 02:26:18.149523', 'step': 120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.187142', 'step': 120, 'epoch': 1} {'type': 'loss', 'content': 0.23775790631771088, 'timestamp': '2025-09-10 02:26:18.189710', 'step': 121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:18.229549', 'step': 121, 'epoch': 1} {'type': 'loss', 'content': 0.18192678689956665, 'timestamp': '2025-09-10 02:26:18.231931', 'step': 122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:18.262299', 'step': 122, 'epoch': 1} {'type': 'loss', 'content': 0.28761279582977295, 'timestamp': '2025-09-10 02:26:18.264237', 'step': 123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.294028', 'step': 123, 'epoch': 1} {'type': 'loss', 'content': 0.1848805844783783, 'timestamp': '2025-09-10 02:26:18.317726', 'step': 124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:18.347817', 'step': 124, 'epoch': 1} {'type': 'loss', 'content': 0.21559256315231323, 'timestamp': '2025-09-10 02:26:18.349779', 'step': 125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:18.379231', 'step': 125, 'epoch': 1} {'type': 'loss', 'content': 0.17116665840148926, 'timestamp': '2025-09-10 02:26:18.381977', 'step': 126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:18.412244', 'step': 126, 'epoch': 1} {'type': 'loss', 'content': 0.2069222778081894, 'timestamp': '2025-09-10 02:26:18.414346', 'step': 127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.444060', 'step': 127, 'epoch': 1} {'type': 'loss', 'content': 0.37566855549812317, 'timestamp': '2025-09-10 02:26:18.467377', 'step': 128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.497405', 'step': 128, 'epoch': 1} {'type': 'loss', 'content': 0.21063590049743652, 'timestamp': '2025-09-10 02:26:18.499455', 'step': 129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.530341', 'step': 129, 'epoch': 1} {'type': 'loss', 'content': 0.19428588449954987, 'timestamp': '2025-09-10 02:26:18.532526', 'step': 130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:18.565033', 'step': 130, 'epoch': 1} {'type': 'loss', 'content': 0.140965074300766, 'timestamp': '2025-09-10 02:26:18.569758', 'step': 131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:18.609794', 'step': 131, 'epoch': 1} {'type': 'loss', 'content': 0.19761106371879578, 'timestamp': '2025-09-10 02:26:18.633620', 'step': 132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:18.665135', 'step': 132, 'epoch': 1} {'type': 'loss', 'content': 0.25934702157974243, 'timestamp': '2025-09-10 02:26:18.667308', 'step': 133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.697428', 'step': 133, 'epoch': 1} {'type': 'loss', 'content': 0.3000790774822235, 'timestamp': '2025-09-10 02:26:18.700090', 'step': 134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.731974', 'step': 134, 'epoch': 1} {'type': 'loss', 'content': 0.16638822853565216, 'timestamp': '2025-09-10 02:26:18.734245', 'step': 135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.764738', 'step': 135, 'epoch': 1} {'type': 'loss', 'content': 0.19231677055358887, 'timestamp': '2025-09-10 02:26:18.788433', 'step': 136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:18.819129', 'step': 136, 'epoch': 1} {'type': 'loss', 'content': 0.28206491470336914, 'timestamp': '2025-09-10 02:26:18.821260', 'step': 137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.851447', 'step': 137, 'epoch': 1} {'type': 'loss', 'content': 0.21552744507789612, 'timestamp': '2025-09-10 02:26:18.853808', 'step': 138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.885226', 'step': 138, 'epoch': 1} {'type': 'loss', 'content': 0.18361131846904755, 'timestamp': '2025-09-10 02:26:18.891100', 'step': 139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:18.932641', 'step': 139, 'epoch': 1} {'type': 'loss', 'content': 0.30604901909828186, 'timestamp': '2025-09-10 02:26:18.956372', 'step': 140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.987095', 'step': 140, 'epoch': 1} {'type': 'loss', 'content': 0.22201967239379883, 'timestamp': '2025-09-10 02:26:18.990185', 'step': 141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.020421', 'step': 141, 'epoch': 1} {'type': 'loss', 'content': 0.1896260678768158, 'timestamp': '2025-09-10 02:26:19.022738', 'step': 142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:19.052914', 'step': 142, 'epoch': 1} {'type': 'loss', 'content': 0.21943815052509308, 'timestamp': '2025-09-10 02:26:19.055081', 'step': 143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:19.084546', 'step': 143, 'epoch': 1} {'type': 'loss', 'content': 0.24899066984653473, 'timestamp': '2025-09-10 02:26:19.107896', 'step': 144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:19.140618', 'step': 144, 'epoch': 1} {'type': 'loss', 'content': 0.16974489390850067, 'timestamp': '2025-09-10 02:26:19.143290', 'step': 145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.173206', 'step': 145, 'epoch': 1} {'type': 'loss', 'content': 0.27881965041160583, 'timestamp': '2025-09-10 02:26:19.175513', 'step': 146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:19.205825', 'step': 146, 'epoch': 1} {'type': 'loss', 'content': 0.24236071109771729, 'timestamp': '2025-09-10 02:26:19.207779', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:19.238854', 'step': 147, 'epoch': 1} {'type': 'loss', 'content': 0.19718052446842194, 'timestamp': '2025-09-10 02:26:19.262593', 'step': 148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:19.293426', 'step': 148, 'epoch': 1} {'type': 'loss', 'content': 0.12167474627494812, 'timestamp': '2025-09-10 02:26:19.296091', 'step': 149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:19.326183', 'step': 149, 'epoch': 1} {'type': 'loss', 'content': 0.21193942427635193, 'timestamp': '2025-09-10 02:26:19.328670', 'step': 150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:19.358328', 'step': 150, 'epoch': 1} {'type': 'loss', 'content': 0.15381145477294922, 'timestamp': '2025-09-10 02:26:19.360334', 'step': 151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:19.395375', 'step': 151, 'epoch': 1} {'type': 'loss', 'content': 0.17159388959407806, 'timestamp': '2025-09-10 02:26:19.419352', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.449206', 'step': 152, 'epoch': 1} {'type': 'loss', 'content': 0.23981545865535736, 'timestamp': '2025-09-10 02:26:19.451182', 'step': 153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:19.481608', 'step': 153, 'epoch': 1} {'type': 'loss', 'content': 0.24314486980438232, 'timestamp': '2025-09-10 02:26:19.484019', 'step': 154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:19.527638', 'step': 154, 'epoch': 1} {'type': 'loss', 'content': 0.3339710235595703, 'timestamp': '2025-09-10 02:26:19.530056', 'step': 155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:19.560364', 'step': 155, 'epoch': 1} {'type': 'loss', 'content': 0.2415395975112915, 'timestamp': '2025-09-10 02:26:19.583961', 'step': 156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.613649', 'step': 156, 'epoch': 1} {'type': 'loss', 'content': 0.16225922107696533, 'timestamp': '2025-09-10 02:26:19.617847', 'step': 157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:19.655427', 'step': 157, 'epoch': 1} {'type': 'loss', 'content': 0.2273770570755005, 'timestamp': '2025-09-10 02:26:19.657697', 'step': 158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:19.686958', 'step': 158, 'epoch': 1} {'type': 'loss', 'content': 0.1701814830303192, 'timestamp': '2025-09-10 02:26:19.689170', 'step': 159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:19.719003', 'step': 159, 'epoch': 1} {'type': 'loss', 'content': 0.20179417729377747, 'timestamp': '2025-09-10 02:26:19.742848', 'step': 160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:19.773102', 'step': 160, 'epoch': 1} {'type': 'loss', 'content': 0.22500517964363098, 'timestamp': '2025-09-10 02:26:19.775087', 'step': 161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:19.805919', 'step': 161, 'epoch': 1} {'type': 'loss', 'content': 0.3135033845901489, 'timestamp': '2025-09-10 02:26:19.809484', 'step': 162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.841291', 'step': 162, 'epoch': 1} {'type': 'loss', 'content': 0.20204143226146698, 'timestamp': '2025-09-10 02:26:19.843403', 'step': 163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:19.872610', 'step': 163, 'epoch': 1} {'type': 'loss', 'content': 0.17563995718955994, 'timestamp': '2025-09-10 02:26:19.896022', 'step': 164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:19.926453', 'step': 164, 'epoch': 1} {'type': 'loss', 'content': 0.280823290348053, 'timestamp': '2025-09-10 02:26:19.928652', 'step': 165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:19.958316', 'step': 165, 'epoch': 1} {'type': 'loss', 'content': 0.16679945588111877, 'timestamp': '2025-09-10 02:26:19.960558', 'step': 166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.990724', 'step': 166, 'epoch': 1} {'type': 'loss', 'content': 0.12045962363481522, 'timestamp': '2025-09-10 02:26:19.992988', 'step': 167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.024450', 'step': 167, 'epoch': 1} {'type': 'loss', 'content': 0.19912703335285187, 'timestamp': '2025-09-10 02:26:20.048387', 'step': 168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.079600', 'step': 168, 'epoch': 1} {'type': 'loss', 'content': 0.152073472738266, 'timestamp': '2025-09-10 02:26:20.082486', 'step': 169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:20.113308', 'step': 169, 'epoch': 1} {'type': 'loss', 'content': 0.18474099040031433, 'timestamp': '2025-09-10 02:26:20.115701', 'step': 170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:20.150433', 'step': 170, 'epoch': 1} {'type': 'loss', 'content': 0.15118487179279327, 'timestamp': '2025-09-10 02:26:20.152688', 'step': 171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:20.184813', 'step': 171, 'epoch': 1} {'type': 'loss', 'content': 0.28830915689468384, 'timestamp': '2025-09-10 02:26:20.208525', 'step': 172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:20.238434', 'step': 172, 'epoch': 1} {'type': 'loss', 'content': 0.26896020770072937, 'timestamp': '2025-09-10 02:26:20.240546', 'step': 173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:20.270722', 'step': 173, 'epoch': 1} {'type': 'loss', 'content': 0.22481508553028107, 'timestamp': '2025-09-10 02:26:20.272760', 'step': 174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.302143', 'step': 174, 'epoch': 1} {'type': 'loss', 'content': 0.2774266004562378, 'timestamp': '2025-09-10 02:26:20.304192', 'step': 175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.333109', 'step': 175, 'epoch': 1} {'type': 'loss', 'content': 0.25407466292381287, 'timestamp': '2025-09-10 02:26:20.357027', 'step': 176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:20.387642', 'step': 176, 'epoch': 1} {'type': 'loss', 'content': 0.2649693191051483, 'timestamp': '2025-09-10 02:26:20.390150', 'step': 177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:20.420422', 'step': 177, 'epoch': 1} {'type': 'loss', 'content': 0.16709397733211517, 'timestamp': '2025-09-10 02:26:20.422552', 'step': 178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.454892', 'step': 178, 'epoch': 1} {'type': 'loss', 'content': 0.23275126516819, 'timestamp': '2025-09-10 02:26:20.457114', 'step': 179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:20.486733', 'step': 179, 'epoch': 1} {'type': 'loss', 'content': 0.18757420778274536, 'timestamp': '2025-09-10 02:26:20.510137', 'step': 180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:20.540629', 'step': 180, 'epoch': 1} {'type': 'loss', 'content': 0.248407244682312, 'timestamp': '2025-09-10 02:26:20.542693', 'step': 181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.572460', 'step': 181, 'epoch': 1} {'type': 'loss', 'content': 0.17919503152370453, 'timestamp': '2025-09-10 02:26:20.574527', 'step': 182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.604684', 'step': 182, 'epoch': 1} {'type': 'loss', 'content': 0.21111759543418884, 'timestamp': '2025-09-10 02:26:20.606892', 'step': 183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.636476', 'step': 183, 'epoch': 1} {'type': 'loss', 'content': 0.16983148455619812, 'timestamp': '2025-09-10 02:26:20.659773', 'step': 184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:20.690330', 'step': 184, 'epoch': 1} {'type': 'loss', 'content': 0.20060941576957703, 'timestamp': '2025-09-10 02:26:20.692430', 'step': 185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.722316', 'step': 185, 'epoch': 1} {'type': 'loss', 'content': 0.1439829021692276, 'timestamp': '2025-09-10 02:26:20.724500', 'step': 186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:20.754053', 'step': 186, 'epoch': 1} {'type': 'loss', 'content': 0.17443795502185822, 'timestamp': '2025-09-10 02:26:20.756245', 'step': 187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:20.785814', 'step': 187, 'epoch': 1} {'type': 'loss', 'content': 0.20807617902755737, 'timestamp': '2025-09-10 02:26:20.809468', 'step': 188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:20.840733', 'step': 188, 'epoch': 1} {'type': 'loss', 'content': 0.2532186806201935, 'timestamp': '2025-09-10 02:26:20.842771', 'step': 189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:20.873620', 'step': 189, 'epoch': 1} {'type': 'loss', 'content': 0.33667102456092834, 'timestamp': '2025-09-10 02:26:20.875946', 'step': 190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:20.906235', 'step': 190, 'epoch': 1} {'type': 'loss', 'content': 0.15169508755207062, 'timestamp': '2025-09-10 02:26:20.908101', 'step': 191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:20.937827', 'step': 191, 'epoch': 1} {'type': 'loss', 'content': 0.34519922733306885, 'timestamp': '2025-09-10 02:26:20.961691', 'step': 192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:20.992379', 'step': 192, 'epoch': 1} {'type': 'loss', 'content': 0.2258731722831726, 'timestamp': '2025-09-10 02:26:20.994342', 'step': 193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:21.023704', 'step': 193, 'epoch': 1} {'type': 'loss', 'content': 0.24251434206962585, 'timestamp': '2025-09-10 02:26:21.025787', 'step': 194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:21.055763', 'step': 194, 'epoch': 1} {'type': 'loss', 'content': 0.1910441666841507, 'timestamp': '2025-09-10 02:26:21.058147', 'step': 195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:21.088143', 'step': 195, 'epoch': 1} {'type': 'loss', 'content': 0.21858064830303192, 'timestamp': '2025-09-10 02:26:21.111647', 'step': 196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:21.141387', 'step': 196, 'epoch': 1} {'type': 'loss', 'content': 0.2029941827058792, 'timestamp': '2025-09-10 02:26:21.147092', 'step': 197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.179793', 'step': 197, 'epoch': 1} {'type': 'loss', 'content': 0.3370617926120758, 'timestamp': '2025-09-10 02:26:21.183397', 'step': 198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:21.219147', 'step': 198, 'epoch': 1} {'type': 'loss', 'content': 0.13152135908603668, 'timestamp': '2025-09-10 02:26:21.222686', 'step': 199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:21.260360', 'step': 199, 'epoch': 1} {'type': 'loss', 'content': 0.12928996980190277, 'timestamp': '2025-09-10 02:26:21.285537', 'step': 200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:21.319745', 'step': 200, 'epoch': 1} {'type': 'loss', 'content': 0.3490965664386749, 'timestamp': '2025-09-10 02:26:21.324085', 'step': 201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:21.375778', 'step': 201, 'epoch': 1} {'type': 'loss', 'content': 0.18509255349636078, 'timestamp': '2025-09-10 02:26:21.381965', 'step': 202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:21.422648', 'step': 202, 'epoch': 1} {'type': 'loss', 'content': 0.13907979428768158, 'timestamp': '2025-09-10 02:26:21.424875', 'step': 203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:21.455167', 'step': 203, 'epoch': 1} {'type': 'loss', 'content': 0.24933171272277832, 'timestamp': '2025-09-10 02:26:21.478943', 'step': 204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.508924', 'step': 204, 'epoch': 1} {'type': 'loss', 'content': 0.32087594270706177, 'timestamp': '2025-09-10 02:26:21.511286', 'step': 205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.541377', 'step': 205, 'epoch': 1} {'type': 'loss', 'content': 0.16205428540706635, 'timestamp': '2025-09-10 02:26:21.544726', 'step': 206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:21.578326', 'step': 206, 'epoch': 1} {'type': 'loss', 'content': 0.18394774198532104, 'timestamp': '2025-09-10 02:26:21.583320', 'step': 207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.623280', 'step': 207, 'epoch': 1} {'type': 'loss', 'content': 0.16968917846679688, 'timestamp': '2025-09-10 02:26:21.646720', 'step': 208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.677071', 'step': 208, 'epoch': 1} {'type': 'loss', 'content': 0.20059515535831451, 'timestamp': '2025-09-10 02:26:21.679390', 'step': 209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.708875', 'step': 209, 'epoch': 1} {'type': 'loss', 'content': 0.3372598886489868, 'timestamp': '2025-09-10 02:26:21.710882', 'step': 210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.740118', 'step': 210, 'epoch': 1} {'type': 'loss', 'content': 0.16715380549430847, 'timestamp': '2025-09-10 02:26:21.742412', 'step': 211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:21.774028', 'step': 211, 'epoch': 1} {'type': 'loss', 'content': 0.2568330764770508, 'timestamp': '2025-09-10 02:26:21.797271', 'step': 212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:21.827778', 'step': 212, 'epoch': 1} {'type': 'loss', 'content': 0.1596459150314331, 'timestamp': '2025-09-10 02:26:21.829805', 'step': 213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:21.858876', 'step': 213, 'epoch': 1} {'type': 'loss', 'content': 0.2836128771305084, 'timestamp': '2025-09-10 02:26:21.861012', 'step': 214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:21.891563', 'step': 214, 'epoch': 1} {'type': 'loss', 'content': 0.17017711699008942, 'timestamp': '2025-09-10 02:26:21.893571', 'step': 215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:21.923579', 'step': 215, 'epoch': 1} {'type': 'loss', 'content': 0.32772567868232727, 'timestamp': '2025-09-10 02:26:21.946605', 'step': 216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:21.978057', 'step': 216, 'epoch': 1} {'type': 'loss', 'content': 0.21089929342269897, 'timestamp': '2025-09-10 02:26:21.980115', 'step': 217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:22.009607', 'step': 217, 'epoch': 1} {'type': 'loss', 'content': 0.20602577924728394, 'timestamp': '2025-09-10 02:26:22.015738', 'step': 218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.053708', 'step': 218, 'epoch': 1} {'type': 'loss', 'content': 0.3226429522037506, 'timestamp': '2025-09-10 02:26:22.055869', 'step': 219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.086536', 'step': 219, 'epoch': 1} {'type': 'loss', 'content': 0.25470170378685, 'timestamp': '2025-09-10 02:26:22.110640', 'step': 220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.140667', 'step': 220, 'epoch': 1} {'type': 'loss', 'content': 0.20088858902454376, 'timestamp': '2025-09-10 02:26:22.142980', 'step': 221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:22.172128', 'step': 221, 'epoch': 1} {'type': 'loss', 'content': 0.12274497747421265, 'timestamp': '2025-09-10 02:26:22.174310', 'step': 222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.203808', 'step': 222, 'epoch': 1} {'type': 'loss', 'content': 0.2259596586227417, 'timestamp': '2025-09-10 02:26:22.206193', 'step': 223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:22.235693', 'step': 223, 'epoch': 1} {'type': 'loss', 'content': 0.17728003859519958, 'timestamp': '2025-09-10 02:26:22.259038', 'step': 224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.289620', 'step': 224, 'epoch': 1} {'type': 'loss', 'content': 0.17697788774967194, 'timestamp': '2025-09-10 02:26:22.291701', 'step': 225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.321756', 'step': 225, 'epoch': 1} {'type': 'loss', 'content': 0.2413497418165207, 'timestamp': '2025-09-10 02:26:22.324127', 'step': 226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:22.354032', 'step': 226, 'epoch': 1} {'type': 'loss', 'content': 0.21851374208927155, 'timestamp': '2025-09-10 02:26:22.356258', 'step': 227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.390886', 'step': 227, 'epoch': 1} {'type': 'loss', 'content': 0.20439614355564117, 'timestamp': '2025-09-10 02:26:22.414182', 'step': 228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.445442', 'step': 228, 'epoch': 1} {'type': 'loss', 'content': 0.19486619532108307, 'timestamp': '2025-09-10 02:26:22.447913', 'step': 229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.479283', 'step': 229, 'epoch': 1} {'type': 'loss', 'content': 0.2583639919757843, 'timestamp': '2025-09-10 02:26:22.481472', 'step': 230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.510741', 'step': 230, 'epoch': 1} {'type': 'loss', 'content': 0.1737431287765503, 'timestamp': '2025-09-10 02:26:22.513183', 'step': 231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.542283', 'step': 231, 'epoch': 1} {'type': 'loss', 'content': 0.2917768359184265, 'timestamp': '2025-09-10 02:26:22.566049', 'step': 232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:22.596276', 'step': 232, 'epoch': 1} {'type': 'loss', 'content': 0.23541848361492157, 'timestamp': '2025-09-10 02:26:22.598259', 'step': 233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.627622', 'step': 233, 'epoch': 1} {'type': 'loss', 'content': 0.19065305590629578, 'timestamp': '2025-09-10 02:26:22.629665', 'step': 234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:22.659620', 'step': 234, 'epoch': 1} {'type': 'loss', 'content': 0.24618296325206757, 'timestamp': '2025-09-10 02:26:22.662912', 'step': 235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:22.692329', 'step': 235, 'epoch': 1} {'type': 'loss', 'content': 0.14170175790786743, 'timestamp': '2025-09-10 02:26:22.715655', 'step': 236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.746155', 'step': 236, 'epoch': 1} {'type': 'loss', 'content': 0.26187747716903687, 'timestamp': '2025-09-10 02:26:22.748459', 'step': 237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.778019', 'step': 237, 'epoch': 1} {'type': 'loss', 'content': 0.2732100486755371, 'timestamp': '2025-09-10 02:26:22.780310', 'step': 238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.810734', 'step': 238, 'epoch': 1} {'type': 'loss', 'content': 0.17143066227436066, 'timestamp': '2025-09-10 02:26:22.812800', 'step': 239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:22.843409', 'step': 239, 'epoch': 1} {'type': 'loss', 'content': 0.2018471509218216, 'timestamp': '2025-09-10 02:26:22.869783', 'step': 240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.911939', 'step': 240, 'epoch': 1} {'type': 'loss', 'content': 0.22281987965106964, 'timestamp': '2025-09-10 02:26:22.914127', 'step': 241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.943714', 'step': 241, 'epoch': 1} {'type': 'loss', 'content': 0.21339084208011627, 'timestamp': '2025-09-10 02:26:22.946082', 'step': 242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:22.975547', 'step': 242, 'epoch': 1} {'type': 'loss', 'content': 0.364390105009079, 'timestamp': '2025-09-10 02:26:22.977560', 'step': 243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:23.007981', 'step': 243, 'epoch': 1} {'type': 'loss', 'content': 0.20781080424785614, 'timestamp': '2025-09-10 02:26:23.031370', 'step': 244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.061061', 'step': 244, 'epoch': 1} {'type': 'loss', 'content': 0.29141172766685486, 'timestamp': '2025-09-10 02:26:23.064476', 'step': 245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.094395', 'step': 245, 'epoch': 1} {'type': 'loss', 'content': 0.18956966698169708, 'timestamp': '2025-09-10 02:26:23.096528', 'step': 246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.126297', 'step': 246, 'epoch': 1} {'type': 'loss', 'content': 0.1490556299686432, 'timestamp': '2025-09-10 02:26:23.129956', 'step': 247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.159537', 'step': 247, 'epoch': 1} {'type': 'loss', 'content': 0.11135300248861313, 'timestamp': '2025-09-10 02:26:23.182940', 'step': 248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:23.213595', 'step': 248, 'epoch': 1} {'type': 'loss', 'content': 0.24562565982341766, 'timestamp': '2025-09-10 02:26:23.215658', 'step': 249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:23.246076', 'step': 249, 'epoch': 1} {'type': 'loss', 'content': 0.19887681305408478, 'timestamp': '2025-09-10 02:26:23.248331', 'step': 250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.277843', 'step': 250, 'epoch': 1} {'type': 'loss', 'content': 0.20318703353405, 'timestamp': '2025-09-10 02:26:23.279816', 'step': 251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.309901', 'step': 251, 'epoch': 1} {'type': 'loss', 'content': 0.19447478652000427, 'timestamp': '2025-09-10 02:26:23.333161', 'step': 252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.368423', 'step': 252, 'epoch': 1} {'type': 'loss', 'content': 0.2355303317308426, 'timestamp': '2025-09-10 02:26:23.370486', 'step': 253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.400526', 'step': 253, 'epoch': 1} {'type': 'loss', 'content': 0.18671846389770508, 'timestamp': '2025-09-10 02:26:23.402617', 'step': 254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.433477', 'step': 254, 'epoch': 1} {'type': 'loss', 'content': 0.12775367498397827, 'timestamp': '2025-09-10 02:26:23.436086', 'step': 255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.466092', 'step': 255, 'epoch': 1} {'type': 'loss', 'content': 0.1312408298254013, 'timestamp': '2025-09-10 02:26:23.489587', 'step': 256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:23.519654', 'step': 256, 'epoch': 1} {'type': 'loss', 'content': 0.2507902681827545, 'timestamp': '2025-09-10 02:26:23.522208', 'step': 257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.552782', 'step': 257, 'epoch': 1} {'type': 'loss', 'content': 0.14818339049816132, 'timestamp': '2025-09-10 02:26:23.555138', 'step': 258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.585485', 'step': 258, 'epoch': 1} {'type': 'loss', 'content': 0.12236791104078293, 'timestamp': '2025-09-10 02:26:23.587790', 'step': 259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.617838', 'step': 259, 'epoch': 1} {'type': 'loss', 'content': 0.24882087111473083, 'timestamp': '2025-09-10 02:26:23.644300', 'step': 260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.691372', 'step': 260, 'epoch': 1} {'type': 'loss', 'content': 0.1655874103307724, 'timestamp': '2025-09-10 02:26:23.693491', 'step': 261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.723296', 'step': 261, 'epoch': 1} {'type': 'loss', 'content': 0.13035346567630768, 'timestamp': '2025-09-10 02:26:23.725437', 'step': 262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.755196', 'step': 262, 'epoch': 1} {'type': 'loss', 'content': 0.21158424019813538, 'timestamp': '2025-09-10 02:26:23.757390', 'step': 263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:26:23.807995', 'step': 263, 'epoch': 1} {'type': 'loss', 'content': 0.2457447648048401, 'timestamp': '2025-09-10 02:26:23.832744', 'step': 264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.863036', 'step': 264, 'epoch': 1} {'type': 'loss', 'content': 0.22308111190795898, 'timestamp': '2025-09-10 02:26:23.865332', 'step': 265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.895151', 'step': 265, 'epoch': 1} {'type': 'loss', 'content': 0.13898232579231262, 'timestamp': '2025-09-10 02:26:23.897372', 'step': 266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.927991', 'step': 266, 'epoch': 1} {'type': 'loss', 'content': 0.13786619901657104, 'timestamp': '2025-09-10 02:26:23.930095', 'step': 267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:23.959683', 'step': 267, 'epoch': 1} {'type': 'loss', 'content': 0.3035302460193634, 'timestamp': '2025-09-10 02:26:23.983155', 'step': 268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.014144', 'step': 268, 'epoch': 1} {'type': 'loss', 'content': 0.1869874745607376, 'timestamp': '2025-09-10 02:26:24.016226', 'step': 269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:24.052131', 'step': 269, 'epoch': 1} {'type': 'loss', 'content': 0.14548633992671967, 'timestamp': '2025-09-10 02:26:24.054336', 'step': 270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:24.085912', 'step': 270, 'epoch': 1} {'type': 'loss', 'content': 0.2606220841407776, 'timestamp': '2025-09-10 02:26:24.089568', 'step': 271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.123516', 'step': 271, 'epoch': 1} {'type': 'loss', 'content': 0.23223401606082916, 'timestamp': '2025-09-10 02:26:24.147267', 'step': 272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:24.179113', 'step': 272, 'epoch': 1} {'type': 'loss', 'content': 0.15168261528015137, 'timestamp': '2025-09-10 02:26:24.182750', 'step': 273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.215665', 'step': 273, 'epoch': 1} {'type': 'loss', 'content': 0.20623846352100372, 'timestamp': '2025-09-10 02:26:24.218724', 'step': 274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:24.248931', 'step': 274, 'epoch': 1} {'type': 'loss', 'content': 0.195373073220253, 'timestamp': '2025-09-10 02:26:24.251077', 'step': 275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.280853', 'step': 275, 'epoch': 1} {'type': 'loss', 'content': 0.1557455062866211, 'timestamp': '2025-09-10 02:26:24.304399', 'step': 276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.333898', 'step': 276, 'epoch': 1} {'type': 'loss', 'content': 0.140010803937912, 'timestamp': '2025-09-10 02:26:24.336088', 'step': 277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.367065', 'step': 277, 'epoch': 1} {'type': 'loss', 'content': 0.25765693187713623, 'timestamp': '2025-09-10 02:26:24.369190', 'step': 278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.398839', 'step': 278, 'epoch': 1} {'type': 'loss', 'content': 0.1820368617773056, 'timestamp': '2025-09-10 02:26:24.405752', 'step': 279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.449131', 'step': 279, 'epoch': 1} {'type': 'loss', 'content': 0.2232631891965866, 'timestamp': '2025-09-10 02:26:24.476217', 'step': 280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.508399', 'step': 280, 'epoch': 1} {'type': 'loss', 'content': 0.1745985448360443, 'timestamp': '2025-09-10 02:26:24.510797', 'step': 281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.541484', 'step': 281, 'epoch': 1} {'type': 'loss', 'content': 0.1919642686843872, 'timestamp': '2025-09-10 02:26:24.543776', 'step': 282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:24.575681', 'step': 282, 'epoch': 1} {'type': 'loss', 'content': 0.18936391174793243, 'timestamp': '2025-09-10 02:26:24.577729', 'step': 283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:24.608362', 'step': 283, 'epoch': 1} {'type': 'loss', 'content': 0.16052024066448212, 'timestamp': '2025-09-10 02:26:24.632077', 'step': 284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:24.667540', 'step': 284, 'epoch': 1} {'type': 'loss', 'content': 0.2232537865638733, 'timestamp': '2025-09-10 02:26:24.670128', 'step': 285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.700156', 'step': 285, 'epoch': 1} {'type': 'loss', 'content': 0.2825932204723358, 'timestamp': '2025-09-10 02:26:24.702180', 'step': 286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.732445', 'step': 286, 'epoch': 1} {'type': 'loss', 'content': 0.3204474449157715, 'timestamp': '2025-09-10 02:26:24.734919', 'step': 287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.764910', 'step': 287, 'epoch': 1} {'type': 'loss', 'content': 0.32290515303611755, 'timestamp': '2025-09-10 02:26:24.788349', 'step': 288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:24.820378', 'step': 288, 'epoch': 1} {'type': 'loss', 'content': 0.2768358886241913, 'timestamp': '2025-09-10 02:26:24.822948', 'step': 289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:24.852735', 'step': 289, 'epoch': 1} {'type': 'loss', 'content': 0.14827941358089447, 'timestamp': '2025-09-10 02:26:24.854801', 'step': 290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:24.884750', 'step': 290, 'epoch': 1} {'type': 'loss', 'content': 0.16156160831451416, 'timestamp': '2025-09-10 02:26:24.888344', 'step': 291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.917723', 'step': 291, 'epoch': 1} {'type': 'loss', 'content': 0.2822439670562744, 'timestamp': '2025-09-10 02:26:24.941460', 'step': 292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:24.972760', 'step': 292, 'epoch': 1} {'type': 'loss', 'content': 0.33290356397628784, 'timestamp': '2025-09-10 02:26:24.974978', 'step': 293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.005727', 'step': 293, 'epoch': 1} {'type': 'loss', 'content': 0.19853727519512177, 'timestamp': '2025-09-10 02:26:25.007842', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.038024', 'step': 294, 'epoch': 1} {'type': 'loss', 'content': 0.18907047808170319, 'timestamp': '2025-09-10 02:26:25.040073', 'step': 295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.070176', 'step': 295, 'epoch': 1} {'type': 'loss', 'content': 0.17895770072937012, 'timestamp': '2025-09-10 02:26:25.093519', 'step': 296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:25.126159', 'step': 296, 'epoch': 1} {'type': 'loss', 'content': 0.1630888432264328, 'timestamp': '2025-09-10 02:26:25.128648', 'step': 297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.160675', 'step': 297, 'epoch': 1} {'type': 'loss', 'content': 0.13295038044452667, 'timestamp': '2025-09-10 02:26:25.163735', 'step': 298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:25.194693', 'step': 298, 'epoch': 1} {'type': 'loss', 'content': 0.23560839891433716, 'timestamp': '2025-09-10 02:26:25.198037', 'step': 299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.228705', 'step': 299, 'epoch': 1} {'type': 'loss', 'content': 0.21554821729660034, 'timestamp': '2025-09-10 02:26:25.252427', 'step': 300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.282672', 'step': 300, 'epoch': 1} {'type': 'loss', 'content': 0.28620943427085876, 'timestamp': '2025-09-10 02:26:25.284850', 'step': 301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:25.316440', 'step': 301, 'epoch': 1} {'type': 'loss', 'content': 0.2278497815132141, 'timestamp': '2025-09-10 02:26:25.318639', 'step': 302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.348631', 'step': 302, 'epoch': 1} {'type': 'loss', 'content': 0.2150653600692749, 'timestamp': '2025-09-10 02:26:25.351008', 'step': 303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.380652', 'step': 303, 'epoch': 1} {'type': 'loss', 'content': 0.18853908777236938, 'timestamp': '2025-09-10 02:26:25.405625', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:26:25.458219', 'step': 304, 'epoch': 1} {'type': 'loss', 'content': 0.21903416514396667, 'timestamp': '2025-09-10 02:26:25.469102', 'step': 305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.499324', 'step': 305, 'epoch': 1} {'type': 'loss', 'content': 0.28536197543144226, 'timestamp': '2025-09-10 02:26:25.501823', 'step': 306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.532458', 'step': 306, 'epoch': 1} {'type': 'loss', 'content': 0.23193179070949554, 'timestamp': '2025-09-10 02:26:25.534567', 'step': 307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.564028', 'step': 307, 'epoch': 1} {'type': 'loss', 'content': 0.16698184609413147, 'timestamp': '2025-09-10 02:26:25.587751', 'step': 308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.619307', 'step': 308, 'epoch': 1} {'type': 'loss', 'content': 0.1627616435289383, 'timestamp': '2025-09-10 02:26:25.621363', 'step': 309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:25.651020', 'step': 309, 'epoch': 1} {'type': 'loss', 'content': 0.21317768096923828, 'timestamp': '2025-09-10 02:26:25.654428', 'step': 310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.684293', 'step': 310, 'epoch': 1} {'type': 'loss', 'content': 0.20717905461788177, 'timestamp': '2025-09-10 02:26:25.686294', 'step': 311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:25.715936', 'step': 311, 'epoch': 1} {'type': 'loss', 'content': 0.1684478372335434, 'timestamp': '2025-09-10 02:26:25.739525', 'step': 312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.769914', 'step': 312, 'epoch': 1} {'type': 'loss', 'content': 0.12965776026248932, 'timestamp': '2025-09-10 02:26:25.772750', 'step': 313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.803548', 'step': 313, 'epoch': 1} {'type': 'loss', 'content': 0.20487073063850403, 'timestamp': '2025-09-10 02:26:25.805537', 'step': 314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.835517', 'step': 314, 'epoch': 1} {'type': 'loss', 'content': 0.26161614060401917, 'timestamp': '2025-09-10 02:26:25.837789', 'step': 315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.869394', 'step': 315, 'epoch': 1} {'type': 'loss', 'content': 0.15071530640125275, 'timestamp': '2025-09-10 02:26:25.892934', 'step': 316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:25.924402', 'step': 316, 'epoch': 1} {'type': 'loss', 'content': 0.1792200207710266, 'timestamp': '2025-09-10 02:26:25.926882', 'step': 317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:25.956869', 'step': 317, 'epoch': 1} {'type': 'loss', 'content': 0.1345180869102478, 'timestamp': '2025-09-10 02:26:25.959020', 'step': 318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.988457', 'step': 318, 'epoch': 1} {'type': 'loss', 'content': 0.20162905752658844, 'timestamp': '2025-09-10 02:26:25.990350', 'step': 319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.020621', 'step': 319, 'epoch': 1} {'type': 'loss', 'content': 0.24488438665866852, 'timestamp': '2025-09-10 02:26:26.044158', 'step': 320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:26.075772', 'step': 320, 'epoch': 1} {'type': 'loss', 'content': 0.2193966507911682, 'timestamp': '2025-09-10 02:26:26.077671', 'step': 321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.107489', 'step': 321, 'epoch': 1} {'type': 'loss', 'content': 0.1777680665254593, 'timestamp': '2025-09-10 02:26:26.109596', 'step': 322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.139761', 'step': 322, 'epoch': 1} {'type': 'loss', 'content': 0.19177433848381042, 'timestamp': '2025-09-10 02:26:26.141952', 'step': 323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.171741', 'step': 323, 'epoch': 1} {'type': 'loss', 'content': 0.16658906638622284, 'timestamp': '2025-09-10 02:26:26.196130', 'step': 324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:26.228339', 'step': 324, 'epoch': 1} {'type': 'loss', 'content': 0.15765736997127533, 'timestamp': '2025-09-10 02:26:26.230409', 'step': 325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.260431', 'step': 325, 'epoch': 1} {'type': 'loss', 'content': 0.26467055082321167, 'timestamp': '2025-09-10 02:26:26.262395', 'step': 326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.292183', 'step': 326, 'epoch': 1} {'type': 'loss', 'content': 0.11764457076787949, 'timestamp': '2025-09-10 02:26:26.294301', 'step': 327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.323448', 'step': 327, 'epoch': 1} {'type': 'loss', 'content': 0.23024652898311615, 'timestamp': '2025-09-10 02:26:26.356505', 'step': 328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.386148', 'step': 328, 'epoch': 1} {'type': 'loss', 'content': 0.25801798701286316, 'timestamp': '2025-09-10 02:26:26.388563', 'step': 329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:26.418296', 'step': 329, 'epoch': 1} {'type': 'loss', 'content': 0.14527951180934906, 'timestamp': '2025-09-10 02:26:26.420751', 'step': 330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:26.450384', 'step': 330, 'epoch': 1} {'type': 'loss', 'content': 0.1894197165966034, 'timestamp': '2025-09-10 02:26:26.452736', 'step': 331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:26.482890', 'step': 331, 'epoch': 1} {'type': 'loss', 'content': 0.24834388494491577, 'timestamp': '2025-09-10 02:26:26.506370', 'step': 332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:26.536751', 'step': 332, 'epoch': 1} {'type': 'loss', 'content': 0.14457954466342926, 'timestamp': '2025-09-10 02:26:26.538943', 'step': 333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.568659', 'step': 333, 'epoch': 1} {'type': 'loss', 'content': 0.26786065101623535, 'timestamp': '2025-09-10 02:26:26.570651', 'step': 334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:26.600858', 'step': 334, 'epoch': 1} {'type': 'loss', 'content': 0.12119848281145096, 'timestamp': '2025-09-10 02:26:26.602804', 'step': 335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.633753', 'step': 335, 'epoch': 1} {'type': 'loss', 'content': 0.19506192207336426, 'timestamp': '2025-09-10 02:26:26.657080', 'step': 336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.687999', 'step': 336, 'epoch': 1} {'type': 'loss', 'content': 0.20575746893882751, 'timestamp': '2025-09-10 02:26:26.690146', 'step': 337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:26.719660', 'step': 337, 'epoch': 1} {'type': 'loss', 'content': 0.16558802127838135, 'timestamp': '2025-09-10 02:26:26.721904', 'step': 338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:26.751627', 'step': 338, 'epoch': 1} {'type': 'loss', 'content': 0.29077035188674927, 'timestamp': '2025-09-10 02:26:26.753761', 'step': 339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.783771', 'step': 339, 'epoch': 1} {'type': 'loss', 'content': 0.25873520970344543, 'timestamp': '2025-09-10 02:26:26.806980', 'step': 340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.838314', 'step': 340, 'epoch': 1} {'type': 'loss', 'content': 0.21700462698936462, 'timestamp': '2025-09-10 02:26:26.840422', 'step': 341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.869708', 'step': 341, 'epoch': 1} {'type': 'loss', 'content': 0.16638903319835663, 'timestamp': '2025-09-10 02:26:26.872334', 'step': 342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.903716', 'step': 342, 'epoch': 1} {'type': 'loss', 'content': 0.17999599874019623, 'timestamp': '2025-09-10 02:26:26.905853', 'step': 343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.936728', 'step': 343, 'epoch': 1} {'type': 'loss', 'content': 0.14978356659412384, 'timestamp': '2025-09-10 02:26:26.960078', 'step': 344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:26.990134', 'step': 344, 'epoch': 1} {'type': 'loss', 'content': 0.2812604606151581, 'timestamp': '2025-09-10 02:26:26.992222', 'step': 345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:27.022418', 'step': 345, 'epoch': 1} {'type': 'loss', 'content': 0.22984924912452698, 'timestamp': '2025-09-10 02:26:27.024455', 'step': 346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:27.062241', 'step': 346, 'epoch': 1} {'type': 'loss', 'content': 0.26981472969055176, 'timestamp': '2025-09-10 02:26:27.064415', 'step': 347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:27.094103', 'step': 347, 'epoch': 1} {'type': 'loss', 'content': 0.20131412148475647, 'timestamp': '2025-09-10 02:26:27.117381', 'step': 348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:27.147230', 'step': 348, 'epoch': 1} {'type': 'loss', 'content': 0.18974615633487701, 'timestamp': '2025-09-10 02:26:27.149471', 'step': 349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:27.181729', 'step': 349, 'epoch': 1} {'type': 'loss', 'content': 0.1608552783727646, 'timestamp': '2025-09-10 02:26:27.183786', 'step': 350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:27.214137', 'step': 350, 'epoch': 1} {'type': 'loss', 'content': 0.1761218011379242, 'timestamp': '2025-09-10 02:26:27.217144', 'step': 351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:27.246267', 'step': 351, 'epoch': 1} {'type': 'loss', 'content': 0.19086380302906036, 'timestamp': '2025-09-10 02:26:27.269616', 'step': 352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:27.300443', 'step': 352, 'epoch': 1} {'type': 'loss', 'content': 0.12670958042144775, 'timestamp': '2025-09-10 02:26:27.302547', 'step': 353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:27.332402', 'step': 353, 'epoch': 1} {'type': 'loss', 'content': 0.11147134751081467, 'timestamp': '2025-09-10 02:26:27.334443', 'step': 354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:27.364382', 'step': 354, 'epoch': 1} {'type': 'loss', 'content': 0.20178595185279846, 'timestamp': '2025-09-10 02:26:27.366525', 'step': 355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:27.396307', 'step': 355, 'epoch': 1} {'type': 'loss', 'content': 0.3553580343723297, 'timestamp': '2025-09-10 02:26:27.419682', 'step': 356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:27.450074', 'step': 356, 'epoch': 1} {'type': 'loss', 'content': 0.2081502228975296, 'timestamp': '2025-09-10 02:26:27.452224', 'step': 357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:27.481722', 'step': 357, 'epoch': 1} {'type': 'loss', 'content': 0.08591876178979874, 'timestamp': '2025-09-10 02:26:27.483637', 'step': 358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:27.513045', 'step': 358, 'epoch': 1} {'type': 'loss', 'content': 0.21553350985050201, 'timestamp': '2025-09-10 02:26:27.515194', 'step': 359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:27.545301', 'step': 359, 'epoch': 1} {'type': 'loss', 'content': 0.21763543784618378, 'timestamp': '2025-09-10 02:26:27.568818', 'step': 360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:27.598409', 'step': 360, 'epoch': 1} {'type': 'loss', 'content': 0.15122853219509125, 'timestamp': '2025-09-10 02:26:27.600547', 'step': 361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:27.630302', 'step': 361, 'epoch': 1} {'type': 'loss', 'content': 0.19311000406742096, 'timestamp': '2025-09-10 02:26:27.632435', 'step': 362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:27.661757', 'step': 362, 'epoch': 1} {'type': 'loss', 'content': 0.1400170624256134, 'timestamp': '2025-09-10 02:26:27.663885', 'step': 363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:27.693212', 'step': 363, 'epoch': 1} {'type': 'loss', 'content': 0.19075247645378113, 'timestamp': '2025-09-10 02:26:27.716619', 'step': 364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:27.748171', 'step': 364, 'epoch': 1} {'type': 'loss', 'content': 0.2752550542354584, 'timestamp': '2025-09-10 02:26:27.750254', 'step': 365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:27.779456', 'step': 365, 'epoch': 1} {'type': 'loss', 'content': 0.1754954755306244, 'timestamp': '2025-09-10 02:26:27.781565', 'step': 366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:27.811590', 'step': 366, 'epoch': 1} {'type': 'loss', 'content': 0.3149440288543701, 'timestamp': '2025-09-10 02:26:27.813940', 'step': 367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:27.843679', 'step': 367, 'epoch': 1} {'type': 'loss', 'content': 0.14874134957790375, 'timestamp': '2025-09-10 02:26:27.867151', 'step': 368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:27.896941', 'step': 368, 'epoch': 1} {'type': 'loss', 'content': 0.2511821687221527, 'timestamp': '2025-09-10 02:26:27.898977', 'step': 369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:27.928810', 'step': 369, 'epoch': 1} {'type': 'loss', 'content': 0.12325068563222885, 'timestamp': '2025-09-10 02:26:27.930907', 'step': 370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:27.960760', 'step': 370, 'epoch': 1} {'type': 'loss', 'content': 0.21812470257282257, 'timestamp': '2025-09-10 02:26:27.962877', 'step': 371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:27.993493', 'step': 371, 'epoch': 1} {'type': 'loss', 'content': 0.2625507116317749, 'timestamp': '2025-09-10 02:26:28.016609', 'step': 372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:28.048120', 'step': 372, 'epoch': 1} {'type': 'loss', 'content': 0.14573469758033752, 'timestamp': '2025-09-10 02:26:28.050239', 'step': 373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.079758', 'step': 373, 'epoch': 1} {'type': 'loss', 'content': 0.18832029402256012, 'timestamp': '2025-09-10 02:26:28.081796', 'step': 374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.111313', 'step': 374, 'epoch': 1} {'type': 'loss', 'content': 0.1515161544084549, 'timestamp': '2025-09-10 02:26:28.113539', 'step': 375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.143696', 'step': 375, 'epoch': 1} {'type': 'loss', 'content': 0.13678795099258423, 'timestamp': '2025-09-10 02:26:28.168436', 'step': 376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.198652', 'step': 376, 'epoch': 1} {'type': 'loss', 'content': 0.20304417610168457, 'timestamp': '2025-09-10 02:26:28.200909', 'step': 377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.230515', 'step': 377, 'epoch': 1} {'type': 'loss', 'content': 0.16645322740077972, 'timestamp': '2025-09-10 02:26:28.232472', 'step': 378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.262698', 'step': 378, 'epoch': 1} {'type': 'loss', 'content': 0.19539229571819305, 'timestamp': '2025-09-10 02:26:28.264778', 'step': 379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:28.294995', 'step': 379, 'epoch': 1} {'type': 'loss', 'content': 0.2851144075393677, 'timestamp': '2025-09-10 02:26:28.318277', 'step': 380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.348697', 'step': 380, 'epoch': 1} {'type': 'loss', 'content': 0.2393040806055069, 'timestamp': '2025-09-10 02:26:28.350900', 'step': 381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.380460', 'step': 381, 'epoch': 1} {'type': 'loss', 'content': 0.32038915157318115, 'timestamp': '2025-09-10 02:26:28.382490', 'step': 382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:28.412696', 'step': 382, 'epoch': 1} {'type': 'loss', 'content': 0.14398911595344543, 'timestamp': '2025-09-10 02:26:28.415469', 'step': 383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.445441', 'step': 383, 'epoch': 1} {'type': 'loss', 'content': 0.23627880215644836, 'timestamp': '2025-09-10 02:26:28.468792', 'step': 384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:28.498632', 'step': 384, 'epoch': 1} {'type': 'loss', 'content': 0.20305223762989044, 'timestamp': '2025-09-10 02:26:28.500773', 'step': 385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.530535', 'step': 385, 'epoch': 1} {'type': 'loss', 'content': 0.17835384607315063, 'timestamp': '2025-09-10 02:26:28.533869', 'step': 386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.563600', 'step': 386, 'epoch': 1} {'type': 'loss', 'content': 0.1275981217622757, 'timestamp': '2025-09-10 02:26:28.566108', 'step': 387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:28.596491', 'step': 387, 'epoch': 1} {'type': 'loss', 'content': 0.22920745611190796, 'timestamp': '2025-09-10 02:26:28.619962', 'step': 388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.649720', 'step': 388, 'epoch': 1} {'type': 'loss', 'content': 0.17840339243412018, 'timestamp': '2025-09-10 02:26:28.651908', 'step': 389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:28.681271', 'step': 389, 'epoch': 1} {'type': 'loss', 'content': 0.19645752012729645, 'timestamp': '2025-09-10 02:26:28.684259', 'step': 390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.713756', 'step': 390, 'epoch': 1} {'type': 'loss', 'content': 0.2247713953256607, 'timestamp': '2025-09-10 02:26:28.716512', 'step': 391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:28.745657', 'step': 391, 'epoch': 1} {'type': 'loss', 'content': 0.2669857144355774, 'timestamp': '2025-09-10 02:26:28.769128', 'step': 392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.798563', 'step': 392, 'epoch': 1} {'type': 'loss', 'content': 0.17774125933647156, 'timestamp': '2025-09-10 02:26:28.800843', 'step': 393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.830753', 'step': 393, 'epoch': 1} {'type': 'loss', 'content': 0.14913859963417053, 'timestamp': '2025-09-10 02:26:28.832816', 'step': 394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:28.864357', 'step': 394, 'epoch': 1} {'type': 'loss', 'content': 0.29838958382606506, 'timestamp': '2025-09-10 02:26:28.866435', 'step': 395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.895565', 'step': 395, 'epoch': 1} {'type': 'loss', 'content': 0.11734305322170258, 'timestamp': '2025-09-10 02:26:28.919544', 'step': 396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:28.949564', 'step': 396, 'epoch': 1} {'type': 'loss', 'content': 0.15990670025348663, 'timestamp': '2025-09-10 02:26:28.951871', 'step': 397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.981342', 'step': 397, 'epoch': 1} {'type': 'loss', 'content': 0.26202186942100525, 'timestamp': '2025-09-10 02:26:28.983548', 'step': 398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.013455', 'step': 398, 'epoch': 1} {'type': 'loss', 'content': 0.22694924473762512, 'timestamp': '2025-09-10 02:26:29.015460', 'step': 399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:29.044549', 'step': 399, 'epoch': 1} {'type': 'loss', 'content': 0.14953027665615082, 'timestamp': '2025-09-10 02:26:29.068019', 'step': 400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:29.098091', 'step': 400, 'epoch': 1} {'type': 'loss', 'content': 0.24018824100494385, 'timestamp': '2025-09-10 02:26:29.100077', 'step': 401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.130301', 'step': 401, 'epoch': 1} {'type': 'loss', 'content': 0.19578300416469574, 'timestamp': '2025-09-10 02:26:29.132527', 'step': 402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:29.162704', 'step': 402, 'epoch': 1} {'type': 'loss', 'content': 0.16862553358078003, 'timestamp': '2025-09-10 02:26:29.164839', 'step': 403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.194087', 'step': 403, 'epoch': 1} {'type': 'loss', 'content': 0.14715558290481567, 'timestamp': '2025-09-10 02:26:29.217580', 'step': 404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.248489', 'step': 404, 'epoch': 1} {'type': 'loss', 'content': 0.19244690239429474, 'timestamp': '2025-09-10 02:26:29.250767', 'step': 405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:29.280418', 'step': 405, 'epoch': 1} {'type': 'loss', 'content': 0.1995958536863327, 'timestamp': '2025-09-10 02:26:29.282790', 'step': 406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.311889', 'step': 406, 'epoch': 1} {'type': 'loss', 'content': 0.1411338597536087, 'timestamp': '2025-09-10 02:26:29.313772', 'step': 407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:29.342961', 'step': 407, 'epoch': 1} {'type': 'loss', 'content': 0.19895781576633453, 'timestamp': '2025-09-10 02:26:29.366626', 'step': 408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:29.396420', 'step': 408, 'epoch': 1} {'type': 'loss', 'content': 0.14970771968364716, 'timestamp': '2025-09-10 02:26:29.398749', 'step': 409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.428779', 'step': 409, 'epoch': 1} {'type': 'loss', 'content': 0.2165336012840271, 'timestamp': '2025-09-10 02:26:29.431313', 'step': 410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.461284', 'step': 410, 'epoch': 1} {'type': 'loss', 'content': 0.2490255981683731, 'timestamp': '2025-09-10 02:26:29.463690', 'step': 411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:29.493788', 'step': 411, 'epoch': 1} {'type': 'loss', 'content': 0.201808899641037, 'timestamp': '2025-09-10 02:26:29.517130', 'step': 412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.546921', 'step': 412, 'epoch': 1} {'type': 'loss', 'content': 0.20187966525554657, 'timestamp': '2025-09-10 02:26:29.549077', 'step': 413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.578702', 'step': 413, 'epoch': 1} {'type': 'loss', 'content': 0.21969738602638245, 'timestamp': '2025-09-10 02:26:29.580852', 'step': 414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:29.611084', 'step': 414, 'epoch': 1} {'type': 'loss', 'content': 0.1686900407075882, 'timestamp': '2025-09-10 02:26:29.613081', 'step': 415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.642636', 'step': 415, 'epoch': 1} {'type': 'loss', 'content': 0.16510352492332458, 'timestamp': '2025-09-10 02:26:29.666019', 'step': 416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:29.696417', 'step': 416, 'epoch': 1} {'type': 'loss', 'content': 0.11307186633348465, 'timestamp': '2025-09-10 02:26:29.698916', 'step': 417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:29.728432', 'step': 417, 'epoch': 1} {'type': 'loss', 'content': 0.20364387333393097, 'timestamp': '2025-09-10 02:26:29.730429', 'step': 418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:29.760264', 'step': 418, 'epoch': 1} {'type': 'loss', 'content': 0.1173519715666771, 'timestamp': '2025-09-10 02:26:29.762632', 'step': 419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:29.792947', 'step': 419, 'epoch': 1} {'type': 'loss', 'content': 0.1508024036884308, 'timestamp': '2025-09-10 02:26:29.816294', 'step': 420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.846763', 'step': 420, 'epoch': 1} {'type': 'loss', 'content': 0.17167113721370697, 'timestamp': '2025-09-10 02:26:29.850169', 'step': 421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.880007', 'step': 421, 'epoch': 1} {'type': 'loss', 'content': 0.2088523954153061, 'timestamp': '2025-09-10 02:26:29.882072', 'step': 422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.911303', 'step': 422, 'epoch': 1} {'type': 'loss', 'content': 0.23534804582595825, 'timestamp': '2025-09-10 02:26:29.913313', 'step': 423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.943108', 'step': 423, 'epoch': 1} {'type': 'loss', 'content': 0.16658782958984375, 'timestamp': '2025-09-10 02:26:29.966420', 'step': 424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.996735', 'step': 424, 'epoch': 1} {'type': 'loss', 'content': 0.15003570914268494, 'timestamp': '2025-09-10 02:26:29.999036', 'step': 425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.028625', 'step': 425, 'epoch': 1} {'type': 'loss', 'content': 0.20153912901878357, 'timestamp': '2025-09-10 02:26:30.030744', 'step': 426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.060391', 'step': 426, 'epoch': 1} {'type': 'loss', 'content': 0.34248480200767517, 'timestamp': '2025-09-10 02:26:30.062841', 'step': 427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:30.092690', 'step': 427, 'epoch': 1} {'type': 'loss', 'content': 0.22822824120521545, 'timestamp': '2025-09-10 02:26:30.116219', 'step': 428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.146371', 'step': 428, 'epoch': 1} {'type': 'loss', 'content': 0.16963163018226624, 'timestamp': '2025-09-10 02:26:30.148389', 'step': 429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:30.179200', 'step': 429, 'epoch': 1} {'type': 'loss', 'content': 0.12356647104024887, 'timestamp': '2025-09-10 02:26:30.181269', 'step': 430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:30.211025', 'step': 430, 'epoch': 1} {'type': 'loss', 'content': 0.2139376699924469, 'timestamp': '2025-09-10 02:26:30.213575', 'step': 431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:30.243051', 'step': 431, 'epoch': 1} {'type': 'loss', 'content': 0.15862798690795898, 'timestamp': '2025-09-10 02:26:30.267368', 'step': 432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.297494', 'step': 432, 'epoch': 1} {'type': 'loss', 'content': 0.13517449796199799, 'timestamp': '2025-09-10 02:26:30.299906', 'step': 433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.329658', 'step': 433, 'epoch': 1} {'type': 'loss', 'content': 0.1689438670873642, 'timestamp': '2025-09-10 02:26:30.332000', 'step': 434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:30.362576', 'step': 434, 'epoch': 1} {'type': 'loss', 'content': 0.3202431797981262, 'timestamp': '2025-09-10 02:26:30.365727', 'step': 435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.395985', 'step': 435, 'epoch': 1} {'type': 'loss', 'content': 0.112936832010746, 'timestamp': '2025-09-10 02:26:30.419792', 'step': 436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.458279', 'step': 436, 'epoch': 1} {'type': 'loss', 'content': 0.23049511015415192, 'timestamp': '2025-09-10 02:26:30.463720', 'step': 437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.497369', 'step': 437, 'epoch': 1} {'type': 'loss', 'content': 0.23448914289474487, 'timestamp': '2025-09-10 02:26:30.499299', 'step': 438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:30.528995', 'step': 438, 'epoch': 1} {'type': 'loss', 'content': 0.13086941838264465, 'timestamp': '2025-09-10 02:26:30.531296', 'step': 439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.560738', 'step': 439, 'epoch': 1} {'type': 'loss', 'content': 0.2437754124403, 'timestamp': '2025-09-10 02:26:30.583892', 'step': 440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.613727', 'step': 440, 'epoch': 1} {'type': 'loss', 'content': 0.17263725399971008, 'timestamp': '2025-09-10 02:26:30.615807', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:30.645455', 'step': 441, 'epoch': 1} {'type': 'loss', 'content': 0.1774987280368805, 'timestamp': '2025-09-10 02:26:30.647488', 'step': 442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:30.677244', 'step': 442, 'epoch': 1} {'type': 'loss', 'content': 0.17908906936645508, 'timestamp': '2025-09-10 02:26:30.679505', 'step': 443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:30.709015', 'step': 443, 'epoch': 1} {'type': 'loss', 'content': 0.20576727390289307, 'timestamp': '2025-09-10 02:26:30.732090', 'step': 444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:30.762250', 'step': 444, 'epoch': 1} {'type': 'loss', 'content': 0.27009111642837524, 'timestamp': '2025-09-10 02:26:30.764585', 'step': 445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:30.794140', 'step': 445, 'epoch': 1} {'type': 'loss', 'content': 0.24207252264022827, 'timestamp': '2025-09-10 02:26:30.796221', 'step': 446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:30.826387', 'step': 446, 'epoch': 1} {'type': 'loss', 'content': 0.24729079008102417, 'timestamp': '2025-09-10 02:26:30.828456', 'step': 447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:30.858071', 'step': 447, 'epoch': 1} {'type': 'loss', 'content': 0.18153667449951172, 'timestamp': '2025-09-10 02:26:30.881459', 'step': 448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.911417', 'step': 448, 'epoch': 1} {'type': 'loss', 'content': 0.2540031969547272, 'timestamp': '2025-09-10 02:26:30.913563', 'step': 449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.943383', 'step': 449, 'epoch': 1} {'type': 'loss', 'content': 0.16451996564865112, 'timestamp': '2025-09-10 02:26:30.945409', 'step': 450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.974955', 'step': 450, 'epoch': 1} {'type': 'loss', 'content': 0.17150156199932098, 'timestamp': '2025-09-10 02:26:30.977006', 'step': 451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:31.006754', 'step': 451, 'epoch': 1} {'type': 'loss', 'content': 0.21590951085090637, 'timestamp': '2025-09-10 02:26:31.030116', 'step': 452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.060085', 'step': 452, 'epoch': 1} {'type': 'loss', 'content': 0.22891800105571747, 'timestamp': '2025-09-10 02:26:31.062623', 'step': 453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:31.094822', 'step': 453, 'epoch': 1} {'type': 'loss', 'content': 0.19015896320343018, 'timestamp': '2025-09-10 02:26:31.097293', 'step': 454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:31.126693', 'step': 454, 'epoch': 1} {'type': 'loss', 'content': 0.2349662482738495, 'timestamp': '2025-09-10 02:26:31.131430', 'step': 455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.161668', 'step': 455, 'epoch': 1} {'type': 'loss', 'content': 0.24021224677562714, 'timestamp': '2025-09-10 02:26:31.185158', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:31.215409', 'step': 456, 'epoch': 1} {'type': 'loss', 'content': 0.23902937769889832, 'timestamp': '2025-09-10 02:26:31.217565', 'step': 457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.247129', 'step': 457, 'epoch': 1} {'type': 'loss', 'content': 0.18361900746822357, 'timestamp': '2025-09-10 02:26:31.249098', 'step': 458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:31.278675', 'step': 458, 'epoch': 1} {'type': 'loss', 'content': 0.24632906913757324, 'timestamp': '2025-09-10 02:26:31.280772', 'step': 459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:31.310734', 'step': 459, 'epoch': 1} {'type': 'loss', 'content': 0.23790393769741058, 'timestamp': '2025-09-10 02:26:31.334022', 'step': 460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:31.364586', 'step': 460, 'epoch': 1} {'type': 'loss', 'content': 0.22995267808437347, 'timestamp': '2025-09-10 02:26:31.368098', 'step': 461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:31.399413', 'step': 461, 'epoch': 1} {'type': 'loss', 'content': 0.23533430695533752, 'timestamp': '2025-09-10 02:26:31.401418', 'step': 462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.431657', 'step': 462, 'epoch': 1} {'type': 'loss', 'content': 0.17498360574245453, 'timestamp': '2025-09-10 02:26:31.434528', 'step': 463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:31.464643', 'step': 463, 'epoch': 1} {'type': 'loss', 'content': 0.2460913211107254, 'timestamp': '2025-09-10 02:26:31.488154', 'step': 464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:31.517988', 'step': 464, 'epoch': 1} {'type': 'loss', 'content': 0.14423434436321259, 'timestamp': '2025-09-10 02:26:31.520772', 'step': 465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:31.552417', 'step': 465, 'epoch': 1} {'type': 'loss', 'content': 0.21251823008060455, 'timestamp': '2025-09-10 02:26:31.554842', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:26:39.226599', 'step': 466, 'epoch': 1} {'type': 'pplx', 'content': 8285.386049784818, 'timestamp': '2025-09-10 02:26:39.229401', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:39.258006', 'step': 466, 'epoch': 1} {'type': 'loss', 'content': 0.21789969503879547, 'timestamp': '2025-09-10 02:26:39.260074', 'step': 467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:39.289652', 'step': 467, 'epoch': 1} {'type': 'loss', 'content': 0.18985117971897125, 'timestamp': '2025-09-10 02:26:39.313475', 'step': 468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:39.343938', 'step': 468, 'epoch': 1} {'type': 'loss', 'content': 0.20956969261169434, 'timestamp': '2025-09-10 02:26:39.346123', 'step': 469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:39.375649', 'step': 469, 'epoch': 1} {'type': 'loss', 'content': 0.10831085592508316, 'timestamp': '2025-09-10 02:26:39.377935', 'step': 470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:39.408231', 'step': 470, 'epoch': 1} {'type': 'loss', 'content': 0.23873305320739746, 'timestamp': '2025-09-10 02:26:39.410302', 'step': 471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:39.439718', 'step': 471, 'epoch': 1} {'type': 'loss', 'content': 0.2383088320493698, 'timestamp': '2025-09-10 02:26:39.463241', 'step': 472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:39.493288', 'step': 472, 'epoch': 1} {'type': 'loss', 'content': 0.1780882477760315, 'timestamp': '2025-09-10 02:26:39.495574', 'step': 473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:39.525464', 'step': 473, 'epoch': 1} {'type': 'loss', 'content': 0.09804089367389679, 'timestamp': '2025-09-10 02:26:39.527395', 'step': 474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:39.556730', 'step': 474, 'epoch': 1} {'type': 'loss', 'content': 0.2947378158569336, 'timestamp': '2025-09-10 02:26:39.558855', 'step': 475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:39.588036', 'step': 475, 'epoch': 1} {'type': 'loss', 'content': 0.16451431810855865, 'timestamp': '2025-09-10 02:26:39.611642', 'step': 476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:39.641663', 'step': 476, 'epoch': 1} {'type': 'loss', 'content': 0.1566561460494995, 'timestamp': '2025-09-10 02:26:39.643911', 'step': 477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:39.673647', 'step': 477, 'epoch': 1} {'type': 'loss', 'content': 0.12515752017498016, 'timestamp': '2025-09-10 02:26:39.677360', 'step': 478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:39.707788', 'step': 478, 'epoch': 1} {'type': 'loss', 'content': 0.1659012734889984, 'timestamp': '2025-09-10 02:26:39.710722', 'step': 479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:39.743812', 'step': 479, 'epoch': 1} {'type': 'loss', 'content': 0.31495729088783264, 'timestamp': '2025-09-10 02:26:39.767695', 'step': 480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:39.797873', 'step': 480, 'epoch': 1} {'type': 'loss', 'content': 0.21109186112880707, 'timestamp': '2025-09-10 02:26:39.800086', 'step': 481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:39.829808', 'step': 481, 'epoch': 1} {'type': 'loss', 'content': 0.17922447621822357, 'timestamp': '2025-09-10 02:26:39.831977', 'step': 482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:39.860770', 'step': 482, 'epoch': 1} {'type': 'loss', 'content': 0.1546357125043869, 'timestamp': '2025-09-10 02:26:39.862994', 'step': 483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:39.894767', 'step': 483, 'epoch': 1} {'type': 'loss', 'content': 0.2516425848007202, 'timestamp': '2025-09-10 02:26:39.918232', 'step': 484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:39.948353', 'step': 484, 'epoch': 1} {'type': 'loss', 'content': 0.16915875673294067, 'timestamp': '2025-09-10 02:26:39.950562', 'step': 485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:39.979967', 'step': 485, 'epoch': 1} {'type': 'loss', 'content': 0.16039830446243286, 'timestamp': '2025-09-10 02:26:39.982142', 'step': 486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:40.012938', 'step': 486, 'epoch': 1} {'type': 'loss', 'content': 0.1164388507604599, 'timestamp': '2025-09-10 02:26:40.015463', 'step': 487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:40.046144', 'step': 487, 'epoch': 1} {'type': 'loss', 'content': 0.25828102231025696, 'timestamp': '2025-09-10 02:26:40.069846', 'step': 488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:40.111791', 'step': 488, 'epoch': 1} {'type': 'loss', 'content': 0.27442434430122375, 'timestamp': '2025-09-10 02:26:40.113972', 'step': 489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:40.144027', 'step': 489, 'epoch': 1} {'type': 'loss', 'content': 0.160625159740448, 'timestamp': '2025-09-10 02:26:40.146188', 'step': 490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:40.175950', 'step': 490, 'epoch': 1} {'type': 'loss', 'content': 0.20947061479091644, 'timestamp': '2025-09-10 02:26:40.178166', 'step': 491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:40.211359', 'step': 491, 'epoch': 1} {'type': 'loss', 'content': 0.21243222057819366, 'timestamp': '2025-09-10 02:26:40.234880', 'step': 492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:40.264522', 'step': 492, 'epoch': 1} {'type': 'loss', 'content': 0.12376873940229416, 'timestamp': '2025-09-10 02:26:40.266511', 'step': 493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:40.296460', 'step': 493, 'epoch': 1} {'type': 'loss', 'content': 0.2298360913991928, 'timestamp': '2025-09-10 02:26:40.299397', 'step': 494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:40.329346', 'step': 494, 'epoch': 1} {'type': 'loss', 'content': 0.2395910620689392, 'timestamp': '2025-09-10 02:26:40.331370', 'step': 495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:40.360299', 'step': 495, 'epoch': 1} {'type': 'loss', 'content': 0.2521478235721588, 'timestamp': '2025-09-10 02:26:40.383702', 'step': 496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:40.415417', 'step': 496, 'epoch': 1} {'type': 'loss', 'content': 0.26956653594970703, 'timestamp': '2025-09-10 02:26:40.418066', 'step': 497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:40.449144', 'step': 497, 'epoch': 1} {'type': 'loss', 'content': 0.21183940768241882, 'timestamp': '2025-09-10 02:26:40.451145', 'step': 498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:40.482514', 'step': 498, 'epoch': 1} {'type': 'loss', 'content': 0.21030393242835999, 'timestamp': '2025-09-10 02:26:40.485195', 'step': 499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:40.521482', 'step': 499, 'epoch': 1} {'type': 'loss', 'content': 0.14671112596988678, 'timestamp': '2025-09-10 02:26:40.546381', 'step': 500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-09-10 02:26:45.063126', 'step': 500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:45.108419', 'step': 500, 'epoch': 1} {'type': 'loss', 'content': 0.09123323857784271, 'timestamp': '2025-09-10 02:26:45.110647', 'step': 501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.141185', 'step': 501, 'epoch': 1} {'type': 'loss', 'content': 0.22537919878959656, 'timestamp': '2025-09-10 02:26:45.143468', 'step': 502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:45.174749', 'step': 502, 'epoch': 1} {'type': 'loss', 'content': 0.14926058053970337, 'timestamp': '2025-09-10 02:26:45.176964', 'step': 503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:45.207585', 'step': 503, 'epoch': 1} {'type': 'loss', 'content': 0.143510639667511, 'timestamp': '2025-09-10 02:26:45.231133', 'step': 504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:45.261321', 'step': 504, 'epoch': 1} {'type': 'loss', 'content': 0.17173181474208832, 'timestamp': '2025-09-10 02:26:45.263453', 'step': 505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:45.293206', 'step': 505, 'epoch': 1} {'type': 'loss', 'content': 0.19408217072486877, 'timestamp': '2025-09-10 02:26:45.295274', 'step': 506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.325856', 'step': 506, 'epoch': 1} {'type': 'loss', 'content': 0.2586282193660736, 'timestamp': '2025-09-10 02:26:45.327999', 'step': 507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.357563', 'step': 507, 'epoch': 1} {'type': 'loss', 'content': 0.12636011838912964, 'timestamp': '2025-09-10 02:26:45.381034', 'step': 508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:45.410799', 'step': 508, 'epoch': 1} {'type': 'loss', 'content': 0.17612631618976593, 'timestamp': '2025-09-10 02:26:45.412973', 'step': 509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.442774', 'step': 509, 'epoch': 1} {'type': 'loss', 'content': 0.15372334420681, 'timestamp': '2025-09-10 02:26:45.444853', 'step': 510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:45.475687', 'step': 510, 'epoch': 1} {'type': 'loss', 'content': 0.11923858523368835, 'timestamp': '2025-09-10 02:26:45.478256', 'step': 511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.508644', 'step': 511, 'epoch': 1} {'type': 'loss', 'content': 0.14580504596233368, 'timestamp': '2025-09-10 02:26:45.532032', 'step': 512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.561808', 'step': 512, 'epoch': 1} {'type': 'loss', 'content': 0.28129521012306213, 'timestamp': '2025-09-10 02:26:45.564083', 'step': 513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:45.593438', 'step': 513, 'epoch': 1} {'type': 'loss', 'content': 0.17176483571529388, 'timestamp': '2025-09-10 02:26:45.595556', 'step': 514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:45.625277', 'step': 514, 'epoch': 1} {'type': 'loss', 'content': 0.21341493725776672, 'timestamp': '2025-09-10 02:26:45.627596', 'step': 515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:45.658384', 'step': 515, 'epoch': 1} {'type': 'loss', 'content': 0.20658157765865326, 'timestamp': '2025-09-10 02:26:45.682305', 'step': 516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.712739', 'step': 516, 'epoch': 1} {'type': 'loss', 'content': 0.21653172373771667, 'timestamp': '2025-09-10 02:26:45.715130', 'step': 517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.745039', 'step': 517, 'epoch': 1} {'type': 'loss', 'content': 0.13120469450950623, 'timestamp': '2025-09-10 02:26:45.747208', 'step': 518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:45.778259', 'step': 518, 'epoch': 1} {'type': 'loss', 'content': 0.21220752596855164, 'timestamp': '2025-09-10 02:26:45.780453', 'step': 519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:45.810723', 'step': 519, 'epoch': 1} {'type': 'loss', 'content': 0.170271635055542, 'timestamp': '2025-09-10 02:26:45.834200', 'step': 520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:45.865584', 'step': 520, 'epoch': 1} {'type': 'loss', 'content': 0.13747504353523254, 'timestamp': '2025-09-10 02:26:45.867735', 'step': 521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:45.897132', 'step': 521, 'epoch': 1} {'type': 'loss', 'content': 0.2968696653842926, 'timestamp': '2025-09-10 02:26:45.899608', 'step': 522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:45.929609', 'step': 522, 'epoch': 1} {'type': 'loss', 'content': 0.19515149295330048, 'timestamp': '2025-09-10 02:26:45.931636', 'step': 523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.960749', 'step': 523, 'epoch': 1} {'type': 'loss', 'content': 0.19037818908691406, 'timestamp': '2025-09-10 02:26:45.984200', 'step': 524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:46.015881', 'step': 524, 'epoch': 1} {'type': 'loss', 'content': 0.09834728389978409, 'timestamp': '2025-09-10 02:26:46.018321', 'step': 525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:46.048584', 'step': 525, 'epoch': 1} {'type': 'loss', 'content': 0.26511141657829285, 'timestamp': '2025-09-10 02:26:46.050843', 'step': 526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:46.082665', 'step': 526, 'epoch': 1} {'type': 'loss', 'content': 0.17937737703323364, 'timestamp': '2025-09-10 02:26:46.084842', 'step': 527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:46.114912', 'step': 527, 'epoch': 1} {'type': 'loss', 'content': 0.17409728467464447, 'timestamp': '2025-09-10 02:26:46.139892', 'step': 528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:46.169994', 'step': 528, 'epoch': 1} {'type': 'loss', 'content': 0.15233351290225983, 'timestamp': '2025-09-10 02:26:46.171964', 'step': 529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.202133', 'step': 529, 'epoch': 1} {'type': 'loss', 'content': 0.20413744449615479, 'timestamp': '2025-09-10 02:26:46.204253', 'step': 530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:46.235177', 'step': 530, 'epoch': 1} {'type': 'loss', 'content': 0.14858295023441315, 'timestamp': '2025-09-10 02:26:46.237512', 'step': 531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:46.267898', 'step': 531, 'epoch': 1} {'type': 'loss', 'content': 0.14652812480926514, 'timestamp': '2025-09-10 02:26:46.291485', 'step': 532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:46.325461', 'step': 532, 'epoch': 1} {'type': 'loss', 'content': 0.1494779735803604, 'timestamp': '2025-09-10 02:26:46.327679', 'step': 533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:46.357860', 'step': 533, 'epoch': 1} {'type': 'loss', 'content': 0.21824191510677338, 'timestamp': '2025-09-10 02:26:46.362161', 'step': 534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.391882', 'step': 534, 'epoch': 1} {'type': 'loss', 'content': 0.37331804633140564, 'timestamp': '2025-09-10 02:26:46.393908', 'step': 535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:46.425320', 'step': 535, 'epoch': 1} {'type': 'loss', 'content': 0.17577452957630157, 'timestamp': '2025-09-10 02:26:46.448758', 'step': 536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.479327', 'step': 536, 'epoch': 1} {'type': 'loss', 'content': 0.12470124661922455, 'timestamp': '2025-09-10 02:26:46.481519', 'step': 537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.512036', 'step': 537, 'epoch': 1} {'type': 'loss', 'content': 0.10117419064044952, 'timestamp': '2025-09-10 02:26:46.514407', 'step': 538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.543589', 'step': 538, 'epoch': 1} {'type': 'loss', 'content': 0.22586579620838165, 'timestamp': '2025-09-10 02:26:46.545909', 'step': 539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:46.575252', 'step': 539, 'epoch': 1} {'type': 'loss', 'content': 0.20569051802158356, 'timestamp': '2025-09-10 02:26:46.598763', 'step': 540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.631204', 'step': 540, 'epoch': 1} {'type': 'loss', 'content': 0.19569402933120728, 'timestamp': '2025-09-10 02:26:46.634234', 'step': 541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.665225', 'step': 541, 'epoch': 1} {'type': 'loss', 'content': 0.17936472594738007, 'timestamp': '2025-09-10 02:26:46.668348', 'step': 542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.700175', 'step': 542, 'epoch': 1} {'type': 'loss', 'content': 0.13319192826747894, 'timestamp': '2025-09-10 02:26:46.702898', 'step': 543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:46.734568', 'step': 543, 'epoch': 1} {'type': 'loss', 'content': 0.2359578013420105, 'timestamp': '2025-09-10 02:26:46.758873', 'step': 544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:46.792362', 'step': 544, 'epoch': 1} {'type': 'loss', 'content': 0.18206225335597992, 'timestamp': '2025-09-10 02:26:46.794378', 'step': 545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.823362', 'step': 545, 'epoch': 1} {'type': 'loss', 'content': 0.2262539565563202, 'timestamp': '2025-09-10 02:26:46.825544', 'step': 546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:46.855058', 'step': 546, 'epoch': 1} {'type': 'loss', 'content': 0.17064252495765686, 'timestamp': '2025-09-10 02:26:46.857557', 'step': 547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:46.887400', 'step': 547, 'epoch': 1} {'type': 'loss', 'content': 0.2685624659061432, 'timestamp': '2025-09-10 02:26:46.910959', 'step': 548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:46.940726', 'step': 548, 'epoch': 1} {'type': 'loss', 'content': 0.30143871903419495, 'timestamp': '2025-09-10 02:26:46.943255', 'step': 549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:46.973634', 'step': 549, 'epoch': 1} {'type': 'loss', 'content': 0.10619562119245529, 'timestamp': '2025-09-10 02:26:46.975761', 'step': 550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.005597', 'step': 550, 'epoch': 1} {'type': 'loss', 'content': 0.19407957792282104, 'timestamp': '2025-09-10 02:26:47.008089', 'step': 551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.037967', 'step': 551, 'epoch': 1} {'type': 'loss', 'content': 0.22718043625354767, 'timestamp': '2025-09-10 02:26:47.061381', 'step': 552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.091218', 'step': 552, 'epoch': 1} {'type': 'loss', 'content': 0.18845325708389282, 'timestamp': '2025-09-10 02:26:47.093433', 'step': 553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.135343', 'step': 553, 'epoch': 1} {'type': 'loss', 'content': 0.206496924161911, 'timestamp': '2025-09-10 02:26:47.137815', 'step': 554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.167863', 'step': 554, 'epoch': 1} {'type': 'loss', 'content': 0.15751835703849792, 'timestamp': '2025-09-10 02:26:47.171383', 'step': 555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.200399', 'step': 555, 'epoch': 1} {'type': 'loss', 'content': 0.14849357306957245, 'timestamp': '2025-09-10 02:26:47.224166', 'step': 556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.253963', 'step': 556, 'epoch': 1} {'type': 'loss', 'content': 0.1388053596019745, 'timestamp': '2025-09-10 02:26:47.256121', 'step': 557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:47.286035', 'step': 557, 'epoch': 1} {'type': 'loss', 'content': 0.10068879276514053, 'timestamp': '2025-09-10 02:26:47.288183', 'step': 558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.317964', 'step': 558, 'epoch': 1} {'type': 'loss', 'content': 0.17956870794296265, 'timestamp': '2025-09-10 02:26:47.320533', 'step': 559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:47.350534', 'step': 559, 'epoch': 1} {'type': 'loss', 'content': 0.2852088510990143, 'timestamp': '2025-09-10 02:26:47.375283', 'step': 560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.406381', 'step': 560, 'epoch': 1} {'type': 'loss', 'content': 0.17341476678848267, 'timestamp': '2025-09-10 02:26:47.409103', 'step': 561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.438766', 'step': 561, 'epoch': 1} {'type': 'loss', 'content': 0.12332557886838913, 'timestamp': '2025-09-10 02:26:47.441069', 'step': 562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:47.471479', 'step': 562, 'epoch': 1} {'type': 'loss', 'content': 0.18090513348579407, 'timestamp': '2025-09-10 02:26:47.475095', 'step': 563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.505318', 'step': 563, 'epoch': 1} {'type': 'loss', 'content': 0.2220364362001419, 'timestamp': '2025-09-10 02:26:47.528841', 'step': 564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:26:47.558923', 'step': 564, 'epoch': 1} {'type': 'loss', 'content': 0.2828448414802551, 'timestamp': '2025-09-10 02:26:47.561125', 'step': 565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.590573', 'step': 565, 'epoch': 1} {'type': 'loss', 'content': 0.22237089276313782, 'timestamp': '2025-09-10 02:26:47.592906', 'step': 566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.623121', 'step': 566, 'epoch': 1} {'type': 'loss', 'content': 0.30480316281318665, 'timestamp': '2025-09-10 02:26:47.625454', 'step': 567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:47.654819', 'step': 567, 'epoch': 1} {'type': 'loss', 'content': 0.16304291784763336, 'timestamp': '2025-09-10 02:26:47.678341', 'step': 568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.708381', 'step': 568, 'epoch': 1} {'type': 'loss', 'content': 0.24908941984176636, 'timestamp': '2025-09-10 02:26:47.710470', 'step': 569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.739542', 'step': 569, 'epoch': 1} {'type': 'loss', 'content': 0.15222007036209106, 'timestamp': '2025-09-10 02:26:47.741982', 'step': 570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.774177', 'step': 570, 'epoch': 1} {'type': 'loss', 'content': 0.18767978250980377, 'timestamp': '2025-09-10 02:26:47.776441', 'step': 571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.807901', 'step': 571, 'epoch': 1} {'type': 'loss', 'content': 0.17705324292182922, 'timestamp': '2025-09-10 02:26:47.831599', 'step': 572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.862217', 'step': 572, 'epoch': 1} {'type': 'loss', 'content': 0.1443585306406021, 'timestamp': '2025-09-10 02:26:47.864241', 'step': 573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:47.893659', 'step': 573, 'epoch': 1} {'type': 'loss', 'content': 0.3216700255870819, 'timestamp': '2025-09-10 02:26:47.896106', 'step': 574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.925862', 'step': 574, 'epoch': 1} {'type': 'loss', 'content': 0.13906122744083405, 'timestamp': '2025-09-10 02:26:47.928134', 'step': 575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:47.958890', 'step': 575, 'epoch': 1} {'type': 'loss', 'content': 0.18564023077487946, 'timestamp': '2025-09-10 02:26:47.982541', 'step': 576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.015357', 'step': 576, 'epoch': 1} {'type': 'loss', 'content': 0.09291606396436691, 'timestamp': '2025-09-10 02:26:48.017492', 'step': 577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:48.046477', 'step': 577, 'epoch': 1} {'type': 'loss', 'content': 0.24297012388706207, 'timestamp': '2025-09-10 02:26:48.049594', 'step': 578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:48.079032', 'step': 578, 'epoch': 1} {'type': 'loss', 'content': 0.2786838710308075, 'timestamp': '2025-09-10 02:26:48.081159', 'step': 579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:48.113259', 'step': 579, 'epoch': 1} {'type': 'loss', 'content': 0.17170767486095428, 'timestamp': '2025-09-10 02:26:48.136792', 'step': 580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:48.166797', 'step': 580, 'epoch': 1} {'type': 'loss', 'content': 0.1510721892118454, 'timestamp': '2025-09-10 02:26:48.168860', 'step': 581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:48.198193', 'step': 581, 'epoch': 1} {'type': 'loss', 'content': 0.1155354231595993, 'timestamp': '2025-09-10 02:26:48.200385', 'step': 582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:48.230248', 'step': 582, 'epoch': 1} {'type': 'loss', 'content': 0.19168202579021454, 'timestamp': '2025-09-10 02:26:48.232492', 'step': 583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:48.261860', 'step': 583, 'epoch': 1} {'type': 'loss', 'content': 0.19942472875118256, 'timestamp': '2025-09-10 02:26:48.285906', 'step': 584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:48.316618', 'step': 584, 'epoch': 1} {'type': 'loss', 'content': 0.16382449865341187, 'timestamp': '2025-09-10 02:26:48.318953', 'step': 585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:48.354118', 'step': 585, 'epoch': 1} {'type': 'loss', 'content': 0.12068495154380798, 'timestamp': '2025-09-10 02:26:48.358374', 'step': 586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:48.388904', 'step': 586, 'epoch': 1} {'type': 'loss', 'content': 0.15214012563228607, 'timestamp': '2025-09-10 02:26:48.391302', 'step': 587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.420974', 'step': 587, 'epoch': 1} {'type': 'loss', 'content': 0.18279217183589935, 'timestamp': '2025-09-10 02:26:48.444706', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.475499', 'step': 588, 'epoch': 1} {'type': 'loss', 'content': 0.25366270542144775, 'timestamp': '2025-09-10 02:26:48.477934', 'step': 589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:48.508425', 'step': 589, 'epoch': 1} {'type': 'loss', 'content': 0.19052636623382568, 'timestamp': '2025-09-10 02:26:48.510946', 'step': 590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:48.541147', 'step': 590, 'epoch': 1} {'type': 'loss', 'content': 0.18524131178855896, 'timestamp': '2025-09-10 02:26:48.543818', 'step': 591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:26:48.574142', 'step': 591, 'epoch': 1} {'type': 'loss', 'content': 0.18339228630065918, 'timestamp': '2025-09-10 02:26:48.602184', 'step': 592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.632571', 'step': 592, 'epoch': 1} {'type': 'loss', 'content': 0.21379461884498596, 'timestamp': '2025-09-10 02:26:48.634814', 'step': 593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:48.664480', 'step': 593, 'epoch': 1} {'type': 'loss', 'content': 0.13851991295814514, 'timestamp': '2025-09-10 02:26:48.666309', 'step': 594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:48.695628', 'step': 594, 'epoch': 1} {'type': 'loss', 'content': 0.15541723370552063, 'timestamp': '2025-09-10 02:26:48.697790', 'step': 595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:48.728457', 'step': 595, 'epoch': 1} {'type': 'loss', 'content': 0.35395827889442444, 'timestamp': '2025-09-10 02:26:48.751859', 'step': 596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:48.782622', 'step': 596, 'epoch': 1} {'type': 'loss', 'content': 0.189134418964386, 'timestamp': '2025-09-10 02:26:48.784840', 'step': 597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.817006', 'step': 597, 'epoch': 1} {'type': 'loss', 'content': 0.21694150567054749, 'timestamp': '2025-09-10 02:26:48.819125', 'step': 598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:48.848989', 'step': 598, 'epoch': 1} {'type': 'loss', 'content': 0.20923423767089844, 'timestamp': '2025-09-10 02:26:48.851234', 'step': 599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:48.881149', 'step': 599, 'epoch': 1} {'type': 'loss', 'content': 0.21421249210834503, 'timestamp': '2025-09-10 02:26:48.904737', 'step': 600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:48.934815', 'step': 600, 'epoch': 1} {'type': 'loss', 'content': 0.1583680957555771, 'timestamp': '2025-09-10 02:26:48.937324', 'step': 601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:48.967291', 'step': 601, 'epoch': 1} {'type': 'loss', 'content': 0.28939199447631836, 'timestamp': '2025-09-10 02:26:48.969561', 'step': 602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.999527', 'step': 602, 'epoch': 1} {'type': 'loss', 'content': 0.23841868340969086, 'timestamp': '2025-09-10 02:26:49.001759', 'step': 603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.032280', 'step': 603, 'epoch': 1} {'type': 'loss', 'content': 0.18944135308265686, 'timestamp': '2025-09-10 02:26:49.055852', 'step': 604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:49.087322', 'step': 604, 'epoch': 1} {'type': 'loss', 'content': 0.15415504574775696, 'timestamp': '2025-09-10 02:26:49.089469', 'step': 605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.120716', 'step': 605, 'epoch': 1} {'type': 'loss', 'content': 0.15797346830368042, 'timestamp': '2025-09-10 02:26:49.122990', 'step': 606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.152561', 'step': 606, 'epoch': 1} {'type': 'loss', 'content': 0.09060920774936676, 'timestamp': '2025-09-10 02:26:49.154979', 'step': 607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.185253', 'step': 607, 'epoch': 1} {'type': 'loss', 'content': 0.16283516585826874, 'timestamp': '2025-09-10 02:26:49.208711', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.238702', 'step': 608, 'epoch': 1} {'type': 'loss', 'content': 0.08753850311040878, 'timestamp': '2025-09-10 02:26:49.240869', 'step': 609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:49.270714', 'step': 609, 'epoch': 1} {'type': 'loss', 'content': 0.2181827276945114, 'timestamp': '2025-09-10 02:26:49.272530', 'step': 610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:49.301774', 'step': 610, 'epoch': 1} {'type': 'loss', 'content': 0.23353548347949982, 'timestamp': '2025-09-10 02:26:49.304111', 'step': 611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:49.334600', 'step': 611, 'epoch': 1} {'type': 'loss', 'content': 0.232558935880661, 'timestamp': '2025-09-10 02:26:49.358209', 'step': 612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.387997', 'step': 612, 'epoch': 1} {'type': 'loss', 'content': 0.1458314061164856, 'timestamp': '2025-09-10 02:26:49.390150', 'step': 613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.419594', 'step': 613, 'epoch': 1} {'type': 'loss', 'content': 0.20372697710990906, 'timestamp': '2025-09-10 02:26:49.421708', 'step': 614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:49.451407', 'step': 614, 'epoch': 1} {'type': 'loss', 'content': 0.22725072503089905, 'timestamp': '2025-09-10 02:26:49.453513', 'step': 615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.483396', 'step': 615, 'epoch': 1} {'type': 'loss', 'content': 0.17057138681411743, 'timestamp': '2025-09-10 02:26:49.506885', 'step': 616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:49.537133', 'step': 616, 'epoch': 1} {'type': 'loss', 'content': 0.21790994703769684, 'timestamp': '2025-09-10 02:26:49.539511', 'step': 617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.569608', 'step': 617, 'epoch': 1} {'type': 'loss', 'content': 0.2512209713459015, 'timestamp': '2025-09-10 02:26:49.571662', 'step': 618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:49.601145', 'step': 618, 'epoch': 1} {'type': 'loss', 'content': 0.13947686553001404, 'timestamp': '2025-09-10 02:26:49.603606', 'step': 619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:49.635130', 'step': 619, 'epoch': 1} {'type': 'loss', 'content': 0.2655167877674103, 'timestamp': '2025-09-10 02:26:49.658776', 'step': 620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:49.688466', 'step': 620, 'epoch': 1} {'type': 'loss', 'content': 0.22189851105213165, 'timestamp': '2025-09-10 02:26:49.690529', 'step': 621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:49.720381', 'step': 621, 'epoch': 1} {'type': 'loss', 'content': 0.25130099058151245, 'timestamp': '2025-09-10 02:26:49.722952', 'step': 622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.752337', 'step': 622, 'epoch': 1} {'type': 'loss', 'content': 0.2659033536911011, 'timestamp': '2025-09-10 02:26:49.755014', 'step': 623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.785130', 'step': 623, 'epoch': 1} {'type': 'loss', 'content': 0.20353558659553528, 'timestamp': '2025-09-10 02:26:49.808676', 'step': 624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.838448', 'step': 624, 'epoch': 1} {'type': 'loss', 'content': 0.1526300013065338, 'timestamp': '2025-09-10 02:26:49.840562', 'step': 625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.870213', 'step': 625, 'epoch': 1} {'type': 'loss', 'content': 0.20887330174446106, 'timestamp': '2025-09-10 02:26:49.872608', 'step': 626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.902332', 'step': 626, 'epoch': 1} {'type': 'loss', 'content': 0.2792467474937439, 'timestamp': '2025-09-10 02:26:49.905900', 'step': 627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:49.935620', 'step': 627, 'epoch': 1} {'type': 'loss', 'content': 0.15706422924995422, 'timestamp': '2025-09-10 02:26:49.959080', 'step': 628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:49.989171', 'step': 628, 'epoch': 1} {'type': 'loss', 'content': 0.25047487020492554, 'timestamp': '2025-09-10 02:26:49.991189', 'step': 629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:50.020768', 'step': 629, 'epoch': 1} {'type': 'loss', 'content': 0.15459829568862915, 'timestamp': '2025-09-10 02:26:50.023791', 'step': 630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.055224', 'step': 630, 'epoch': 1} {'type': 'loss', 'content': 0.16907261312007904, 'timestamp': '2025-09-10 02:26:50.057458', 'step': 631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.088818', 'step': 631, 'epoch': 1} {'type': 'loss', 'content': 0.1984090358018875, 'timestamp': '2025-09-10 02:26:50.114720', 'step': 632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:50.150206', 'step': 632, 'epoch': 1} {'type': 'loss', 'content': 0.14670933783054352, 'timestamp': '2025-09-10 02:26:50.152087', 'step': 633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:50.181932', 'step': 633, 'epoch': 1} {'type': 'loss', 'content': 0.21897760033607483, 'timestamp': '2025-09-10 02:26:50.184106', 'step': 634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:50.213906', 'step': 634, 'epoch': 1} {'type': 'loss', 'content': 0.1435650885105133, 'timestamp': '2025-09-10 02:26:50.216340', 'step': 635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.246946', 'step': 635, 'epoch': 1} {'type': 'loss', 'content': 0.1915169507265091, 'timestamp': '2025-09-10 02:26:50.270404', 'step': 636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:50.300235', 'step': 636, 'epoch': 1} {'type': 'loss', 'content': 0.18235903978347778, 'timestamp': '2025-09-10 02:26:50.303494', 'step': 637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.337259', 'step': 637, 'epoch': 1} {'type': 'loss', 'content': 0.22840572893619537, 'timestamp': '2025-09-10 02:26:50.339412', 'step': 638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.369296', 'step': 638, 'epoch': 1} {'type': 'loss', 'content': 0.18998222053050995, 'timestamp': '2025-09-10 02:26:50.371419', 'step': 639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:50.401942', 'step': 639, 'epoch': 1} {'type': 'loss', 'content': 0.1690913289785385, 'timestamp': '2025-09-10 02:26:50.425354', 'step': 640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.456419', 'step': 640, 'epoch': 1} {'type': 'loss', 'content': 0.1918250322341919, 'timestamp': '2025-09-10 02:26:50.458575', 'step': 641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.488803', 'step': 641, 'epoch': 1} {'type': 'loss', 'content': 0.18135251104831696, 'timestamp': '2025-09-10 02:26:50.491017', 'step': 642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.520178', 'step': 642, 'epoch': 1} {'type': 'loss', 'content': 0.23814553022384644, 'timestamp': '2025-09-10 02:26:50.522331', 'step': 643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.552013', 'step': 643, 'epoch': 1} {'type': 'loss', 'content': 0.1802496463060379, 'timestamp': '2025-09-10 02:26:50.575641', 'step': 644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:50.605464', 'step': 644, 'epoch': 1} {'type': 'loss', 'content': 0.209016814827919, 'timestamp': '2025-09-10 02:26:50.607667', 'step': 645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.640678', 'step': 645, 'epoch': 1} {'type': 'loss', 'content': 0.186257466673851, 'timestamp': '2025-09-10 02:26:50.642752', 'step': 646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:50.672291', 'step': 646, 'epoch': 1} {'type': 'loss', 'content': 0.20962508022785187, 'timestamp': '2025-09-10 02:26:50.674627', 'step': 647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:50.704618', 'step': 647, 'epoch': 1} {'type': 'loss', 'content': 0.2541416883468628, 'timestamp': '2025-09-10 02:26:50.734723', 'step': 648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.765122', 'step': 648, 'epoch': 1} {'type': 'loss', 'content': 0.1983148604631424, 'timestamp': '2025-09-10 02:26:50.767257', 'step': 649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:50.796504', 'step': 649, 'epoch': 1} {'type': 'loss', 'content': 0.16631513833999634, 'timestamp': '2025-09-10 02:26:50.798770', 'step': 650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:50.829420', 'step': 650, 'epoch': 1} {'type': 'loss', 'content': 0.3156251609325409, 'timestamp': '2025-09-10 02:26:50.832087', 'step': 651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.862136', 'step': 651, 'epoch': 1} {'type': 'loss', 'content': 0.09992559254169464, 'timestamp': '2025-09-10 02:26:50.885571', 'step': 652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:50.915746', 'step': 652, 'epoch': 1} {'type': 'loss', 'content': 0.1951369196176529, 'timestamp': '2025-09-10 02:26:50.919518', 'step': 653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.950203', 'step': 653, 'epoch': 1} {'type': 'loss', 'content': 0.20907698571681976, 'timestamp': '2025-09-10 02:26:50.952407', 'step': 654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:50.982701', 'step': 654, 'epoch': 1} {'type': 'loss', 'content': 0.21392863988876343, 'timestamp': '2025-09-10 02:26:50.985080', 'step': 655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.015417', 'step': 655, 'epoch': 1} {'type': 'loss', 'content': 0.10358292609453201, 'timestamp': '2025-09-10 02:26:51.039100', 'step': 656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.070636', 'step': 656, 'epoch': 1} {'type': 'loss', 'content': 0.14631962776184082, 'timestamp': '2025-09-10 02:26:51.072812', 'step': 657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.102218', 'step': 657, 'epoch': 1} {'type': 'loss', 'content': 0.1521897315979004, 'timestamp': '2025-09-10 02:26:51.104416', 'step': 658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.138837', 'step': 658, 'epoch': 1} {'type': 'loss', 'content': 0.20061607658863068, 'timestamp': '2025-09-10 02:26:51.141697', 'step': 659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.172208', 'step': 659, 'epoch': 1} {'type': 'loss', 'content': 0.14848832786083221, 'timestamp': '2025-09-10 02:26:51.195741', 'step': 660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.226940', 'step': 660, 'epoch': 1} {'type': 'loss', 'content': 0.18043597042560577, 'timestamp': '2025-09-10 02:26:51.229080', 'step': 661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.259127', 'step': 661, 'epoch': 1} {'type': 'loss', 'content': 0.2373562902212143, 'timestamp': '2025-09-10 02:26:51.261199', 'step': 662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.291027', 'step': 662, 'epoch': 1} {'type': 'loss', 'content': 0.2869243621826172, 'timestamp': '2025-09-10 02:26:51.293166', 'step': 663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.322706', 'step': 663, 'epoch': 1} {'type': 'loss', 'content': 0.21813443303108215, 'timestamp': '2025-09-10 02:26:51.347456', 'step': 664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.377375', 'step': 664, 'epoch': 1} {'type': 'loss', 'content': 0.23601984977722168, 'timestamp': '2025-09-10 02:26:51.379442', 'step': 665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:51.410138', 'step': 665, 'epoch': 1} {'type': 'loss', 'content': 0.1351936161518097, 'timestamp': '2025-09-10 02:26:51.412283', 'step': 666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:51.442545', 'step': 666, 'epoch': 1} {'type': 'loss', 'content': 0.2317437082529068, 'timestamp': '2025-09-10 02:26:51.444941', 'step': 667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.474584', 'step': 667, 'epoch': 1} {'type': 'loss', 'content': 0.2018151581287384, 'timestamp': '2025-09-10 02:26:51.498009', 'step': 668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:51.530180', 'step': 668, 'epoch': 1} {'type': 'loss', 'content': 0.3936104476451874, 'timestamp': '2025-09-10 02:26:51.532545', 'step': 669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:51.562955', 'step': 669, 'epoch': 1} {'type': 'loss', 'content': 0.2463226616382599, 'timestamp': '2025-09-10 02:26:51.567169', 'step': 670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.596999', 'step': 670, 'epoch': 1} {'type': 'loss', 'content': 0.1762087345123291, 'timestamp': '2025-09-10 02:26:51.599190', 'step': 671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:51.628869', 'step': 671, 'epoch': 1} {'type': 'loss', 'content': 0.18536998331546783, 'timestamp': '2025-09-10 02:26:51.652186', 'step': 672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.682221', 'step': 672, 'epoch': 1} {'type': 'loss', 'content': 0.1837751865386963, 'timestamp': '2025-09-10 02:26:51.692420', 'step': 673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.722277', 'step': 673, 'epoch': 1} {'type': 'loss', 'content': 0.192656010389328, 'timestamp': '2025-09-10 02:26:51.724366', 'step': 674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.754611', 'step': 674, 'epoch': 1} {'type': 'loss', 'content': 0.17249467968940735, 'timestamp': '2025-09-10 02:26:51.756670', 'step': 675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:51.787963', 'step': 675, 'epoch': 1} {'type': 'loss', 'content': 0.18137086927890778, 'timestamp': '2025-09-10 02:26:51.811592', 'step': 676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.842413', 'step': 676, 'epoch': 1} {'type': 'loss', 'content': 0.14917661249637604, 'timestamp': '2025-09-10 02:26:51.844500', 'step': 677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.873701', 'step': 677, 'epoch': 1} {'type': 'loss', 'content': 0.33624202013015747, 'timestamp': '2025-09-10 02:26:51.875761', 'step': 678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:51.905157', 'step': 678, 'epoch': 1} {'type': 'loss', 'content': 0.1758219450712204, 'timestamp': '2025-09-10 02:26:51.907554', 'step': 679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:51.936694', 'step': 679, 'epoch': 1} {'type': 'loss', 'content': 0.1978168934583664, 'timestamp': '2025-09-10 02:26:51.960810', 'step': 680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.990480', 'step': 680, 'epoch': 1} {'type': 'loss', 'content': 0.17985767126083374, 'timestamp': '2025-09-10 02:26:51.992400', 'step': 681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:52.022251', 'step': 681, 'epoch': 1} {'type': 'loss', 'content': 0.1871323436498642, 'timestamp': '2025-09-10 02:26:52.024238', 'step': 682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.053530', 'step': 682, 'epoch': 1} {'type': 'loss', 'content': 0.18568331003189087, 'timestamp': '2025-09-10 02:26:52.055642', 'step': 683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.085140', 'step': 683, 'epoch': 1} {'type': 'loss', 'content': 0.15015123784542084, 'timestamp': '2025-09-10 02:26:52.108712', 'step': 684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.145491', 'step': 684, 'epoch': 1} {'type': 'loss', 'content': 0.20665156841278076, 'timestamp': '2025-09-10 02:26:52.147670', 'step': 685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.177465', 'step': 685, 'epoch': 1} {'type': 'loss', 'content': 0.21158382296562195, 'timestamp': '2025-09-10 02:26:52.179296', 'step': 686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.208603', 'step': 686, 'epoch': 1} {'type': 'loss', 'content': 0.1981191188097, 'timestamp': '2025-09-10 02:26:52.210882', 'step': 687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:52.241032', 'step': 687, 'epoch': 1} {'type': 'loss', 'content': 0.14495831727981567, 'timestamp': '2025-09-10 02:26:52.264418', 'step': 688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.294142', 'step': 688, 'epoch': 1} {'type': 'loss', 'content': 0.2015880048274994, 'timestamp': '2025-09-10 02:26:52.296282', 'step': 689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:52.326444', 'step': 689, 'epoch': 1} {'type': 'loss', 'content': 0.24840915203094482, 'timestamp': '2025-09-10 02:26:52.328760', 'step': 690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.359191', 'step': 690, 'epoch': 1} {'type': 'loss', 'content': 0.17380574345588684, 'timestamp': '2025-09-10 02:26:52.361187', 'step': 691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.390960', 'step': 691, 'epoch': 1} {'type': 'loss', 'content': 0.20522017776966095, 'timestamp': '2025-09-10 02:26:52.414221', 'step': 692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.444495', 'step': 692, 'epoch': 1} {'type': 'loss', 'content': 0.21826717257499695, 'timestamp': '2025-09-10 02:26:52.446757', 'step': 693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.477022', 'step': 693, 'epoch': 1} {'type': 'loss', 'content': 0.27542147040367126, 'timestamp': '2025-09-10 02:26:52.479421', 'step': 694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:52.508973', 'step': 694, 'epoch': 1} {'type': 'loss', 'content': 0.25876471400260925, 'timestamp': '2025-09-10 02:26:52.511202', 'step': 695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:52.541675', 'step': 695, 'epoch': 1} {'type': 'loss', 'content': 0.16058672964572906, 'timestamp': '2025-09-10 02:26:52.564974', 'step': 696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.594640', 'step': 696, 'epoch': 1} {'type': 'loss', 'content': 0.22646936774253845, 'timestamp': '2025-09-10 02:26:52.596908', 'step': 697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:52.626642', 'step': 697, 'epoch': 1} {'type': 'loss', 'content': 0.19743241369724274, 'timestamp': '2025-09-10 02:26:52.628948', 'step': 698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.658562', 'step': 698, 'epoch': 1} {'type': 'loss', 'content': 0.2285546064376831, 'timestamp': '2025-09-10 02:26:52.660892', 'step': 699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.690754', 'step': 699, 'epoch': 1} {'type': 'loss', 'content': 0.09617214649915695, 'timestamp': '2025-09-10 02:26:52.714259', 'step': 700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.745369', 'step': 700, 'epoch': 1} {'type': 'loss', 'content': 0.2325233668088913, 'timestamp': '2025-09-10 02:26:52.747592', 'step': 701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:52.778523', 'step': 701, 'epoch': 1} {'type': 'loss', 'content': 0.13064564764499664, 'timestamp': '2025-09-10 02:26:52.780614', 'step': 702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.809881', 'step': 702, 'epoch': 1} {'type': 'loss', 'content': 0.20808744430541992, 'timestamp': '2025-09-10 02:26:52.812334', 'step': 703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:52.846492', 'step': 703, 'epoch': 1} {'type': 'loss', 'content': 0.18774104118347168, 'timestamp': '2025-09-10 02:26:52.870020', 'step': 704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:52.901376', 'step': 704, 'epoch': 1} {'type': 'loss', 'content': 0.22362518310546875, 'timestamp': '2025-09-10 02:26:52.903524', 'step': 705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.933546', 'step': 705, 'epoch': 1} {'type': 'loss', 'content': 0.24247485399246216, 'timestamp': '2025-09-10 02:26:52.935565', 'step': 706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:52.965625', 'step': 706, 'epoch': 1} {'type': 'loss', 'content': 0.26832494139671326, 'timestamp': '2025-09-10 02:26:52.968104', 'step': 707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.998338', 'step': 707, 'epoch': 1} {'type': 'loss', 'content': 0.18312236666679382, 'timestamp': '2025-09-10 02:26:53.022926', 'step': 708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:53.053269', 'step': 708, 'epoch': 1} {'type': 'loss', 'content': 0.14598476886749268, 'timestamp': '2025-09-10 02:26:53.055310', 'step': 709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.084899', 'step': 709, 'epoch': 1} {'type': 'loss', 'content': 0.18242228031158447, 'timestamp': '2025-09-10 02:26:53.089284', 'step': 710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.122466', 'step': 710, 'epoch': 1} {'type': 'loss', 'content': 0.20250526070594788, 'timestamp': '2025-09-10 02:26:53.125723', 'step': 711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:53.155896', 'step': 711, 'epoch': 1} {'type': 'loss', 'content': 0.14770765602588654, 'timestamp': '2025-09-10 02:26:53.179470', 'step': 712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:53.210058', 'step': 712, 'epoch': 1} {'type': 'loss', 'content': 0.21282263100147247, 'timestamp': '2025-09-10 02:26:53.212173', 'step': 713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.242200', 'step': 713, 'epoch': 1} {'type': 'loss', 'content': 0.18212532997131348, 'timestamp': '2025-09-10 02:26:53.244273', 'step': 714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.275018', 'step': 714, 'epoch': 1} {'type': 'loss', 'content': 0.16662634909152985, 'timestamp': '2025-09-10 02:26:53.276882', 'step': 715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:53.306015', 'step': 715, 'epoch': 1} {'type': 'loss', 'content': 0.19359922409057617, 'timestamp': '2025-09-10 02:26:53.329667', 'step': 716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:53.359288', 'step': 716, 'epoch': 1} {'type': 'loss', 'content': 0.13761185109615326, 'timestamp': '2025-09-10 02:26:53.361325', 'step': 717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.391965', 'step': 717, 'epoch': 1} {'type': 'loss', 'content': 0.23751239478588104, 'timestamp': '2025-09-10 02:26:53.393804', 'step': 718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:53.423936', 'step': 718, 'epoch': 1} {'type': 'loss', 'content': 0.22884871065616608, 'timestamp': '2025-09-10 02:26:53.426078', 'step': 719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:53.455792', 'step': 719, 'epoch': 1} {'type': 'loss', 'content': 0.1361256092786789, 'timestamp': '2025-09-10 02:26:53.479386', 'step': 720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.509436', 'step': 720, 'epoch': 1} {'type': 'loss', 'content': 0.12134286761283875, 'timestamp': '2025-09-10 02:26:53.511678', 'step': 721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:53.542945', 'step': 721, 'epoch': 1} {'type': 'loss', 'content': 0.19267185032367706, 'timestamp': '2025-09-10 02:26:53.545374', 'step': 722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:53.576343', 'step': 722, 'epoch': 1} {'type': 'loss', 'content': 0.2148524820804596, 'timestamp': '2025-09-10 02:26:53.578389', 'step': 723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:53.607210', 'step': 723, 'epoch': 1} {'type': 'loss', 'content': 0.06777649372816086, 'timestamp': '2025-09-10 02:26:53.630662', 'step': 724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.660741', 'step': 724, 'epoch': 1} {'type': 'loss', 'content': 0.21703962981700897, 'timestamp': '2025-09-10 02:26:53.662797', 'step': 725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.692594', 'step': 725, 'epoch': 1} {'type': 'loss', 'content': 0.22260451316833496, 'timestamp': '2025-09-10 02:26:53.694704', 'step': 726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.723840', 'step': 726, 'epoch': 1} {'type': 'loss', 'content': 0.14212967455387115, 'timestamp': '2025-09-10 02:26:53.726146', 'step': 727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:53.754943', 'step': 727, 'epoch': 1} {'type': 'loss', 'content': 0.18293437361717224, 'timestamp': '2025-09-10 02:26:53.778928', 'step': 728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:53.809053', 'step': 728, 'epoch': 1} {'type': 'loss', 'content': 0.18185651302337646, 'timestamp': '2025-09-10 02:26:53.811192', 'step': 729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:53.840399', 'step': 729, 'epoch': 1} {'type': 'loss', 'content': 0.17347624897956848, 'timestamp': '2025-09-10 02:26:53.842541', 'step': 730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:53.872646', 'step': 730, 'epoch': 1} {'type': 'loss', 'content': 0.22977697849273682, 'timestamp': '2025-09-10 02:26:53.874991', 'step': 731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:53.904152', 'step': 731, 'epoch': 1} {'type': 'loss', 'content': 0.14995497465133667, 'timestamp': '2025-09-10 02:26:53.927586', 'step': 732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:53.957224', 'step': 732, 'epoch': 1} {'type': 'loss', 'content': 0.21651461720466614, 'timestamp': '2025-09-10 02:26:53.959775', 'step': 733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:53.989608', 'step': 733, 'epoch': 1} {'type': 'loss', 'content': 0.2254635989665985, 'timestamp': '2025-09-10 02:26:53.991729', 'step': 734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.022291', 'step': 734, 'epoch': 1} {'type': 'loss', 'content': 0.29766732454299927, 'timestamp': '2025-09-10 02:26:54.024495', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:54.054434', 'step': 735, 'epoch': 1} {'type': 'loss', 'content': 0.20096828043460846, 'timestamp': '2025-09-10 02:26:54.077897', 'step': 736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:54.108027', 'step': 736, 'epoch': 1} {'type': 'loss', 'content': 0.17951609194278717, 'timestamp': '2025-09-10 02:26:54.110468', 'step': 737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.140291', 'step': 737, 'epoch': 1} {'type': 'loss', 'content': 0.14275473356246948, 'timestamp': '2025-09-10 02:26:54.142646', 'step': 738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:54.172190', 'step': 738, 'epoch': 1} {'type': 'loss', 'content': 0.15967075526714325, 'timestamp': '2025-09-10 02:26:54.174672', 'step': 739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:54.204477', 'step': 739, 'epoch': 1} {'type': 'loss', 'content': 0.11513850092887878, 'timestamp': '2025-09-10 02:26:54.227989', 'step': 740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.259594', 'step': 740, 'epoch': 1} {'type': 'loss', 'content': 0.16804909706115723, 'timestamp': '2025-09-10 02:26:54.262908', 'step': 741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.293158', 'step': 741, 'epoch': 1} {'type': 'loss', 'content': 0.1689005196094513, 'timestamp': '2025-09-10 02:26:54.295530', 'step': 742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:54.325144', 'step': 742, 'epoch': 1} {'type': 'loss', 'content': 0.24260519444942474, 'timestamp': '2025-09-10 02:26:54.327469', 'step': 743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:54.356842', 'step': 743, 'epoch': 1} {'type': 'loss', 'content': 0.1933165341615677, 'timestamp': '2025-09-10 02:26:54.380286', 'step': 744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:54.411298', 'step': 744, 'epoch': 1} {'type': 'loss', 'content': 0.1511453092098236, 'timestamp': '2025-09-10 02:26:54.413654', 'step': 745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.443623', 'step': 745, 'epoch': 1} {'type': 'loss', 'content': 0.20300355553627014, 'timestamp': '2025-09-10 02:26:54.445874', 'step': 746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:54.475701', 'step': 746, 'epoch': 1} {'type': 'loss', 'content': 0.20696046948432922, 'timestamp': '2025-09-10 02:26:54.477873', 'step': 747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:54.507187', 'step': 747, 'epoch': 1} {'type': 'loss', 'content': 0.2277160882949829, 'timestamp': '2025-09-10 02:26:54.531331', 'step': 748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:54.561465', 'step': 748, 'epoch': 1} {'type': 'loss', 'content': 0.21629750728607178, 'timestamp': '2025-09-10 02:26:54.563471', 'step': 749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.595306', 'step': 749, 'epoch': 1} {'type': 'loss', 'content': 0.256852388381958, 'timestamp': '2025-09-10 02:26:54.597256', 'step': 750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:54.626837', 'step': 750, 'epoch': 1} {'type': 'loss', 'content': 0.24643434584140778, 'timestamp': '2025-09-10 02:26:54.629126', 'step': 751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:54.659701', 'step': 751, 'epoch': 1} {'type': 'loss', 'content': 0.26888421177864075, 'timestamp': '2025-09-10 02:26:54.683332', 'step': 752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.713400', 'step': 752, 'epoch': 1} {'type': 'loss', 'content': 0.1881176233291626, 'timestamp': '2025-09-10 02:26:54.715801', 'step': 753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.745965', 'step': 753, 'epoch': 1} {'type': 'loss', 'content': 0.23180894553661346, 'timestamp': '2025-09-10 02:26:54.748108', 'step': 754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:54.778285', 'step': 754, 'epoch': 1} {'type': 'loss', 'content': 0.17726479470729828, 'timestamp': '2025-09-10 02:26:54.780315', 'step': 755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:54.814399', 'step': 755, 'epoch': 1} {'type': 'loss', 'content': 0.1537967026233673, 'timestamp': '2025-09-10 02:26:54.837907', 'step': 756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:54.868116', 'step': 756, 'epoch': 1} {'type': 'loss', 'content': 0.16898025572299957, 'timestamp': '2025-09-10 02:26:54.871097', 'step': 757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:54.900783', 'step': 757, 'epoch': 1} {'type': 'loss', 'content': 0.3422839343547821, 'timestamp': '2025-09-10 02:26:54.902931', 'step': 758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.933749', 'step': 758, 'epoch': 1} {'type': 'loss', 'content': 0.13719093799591064, 'timestamp': '2025-09-10 02:26:54.935973', 'step': 759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:54.965592', 'step': 759, 'epoch': 1} {'type': 'loss', 'content': 0.1422863006591797, 'timestamp': '2025-09-10 02:26:54.989181', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.019462', 'step': 760, 'epoch': 1} {'type': 'loss', 'content': 0.14215238392353058, 'timestamp': '2025-09-10 02:26:55.021865', 'step': 761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.051365', 'step': 761, 'epoch': 1} {'type': 'loss', 'content': 0.14729757606983185, 'timestamp': '2025-09-10 02:26:55.053518', 'step': 762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.083409', 'step': 762, 'epoch': 1} {'type': 'loss', 'content': 0.3027743995189667, 'timestamp': '2025-09-10 02:26:55.086102', 'step': 763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.115911', 'step': 763, 'epoch': 1} {'type': 'loss', 'content': 0.16596467792987823, 'timestamp': '2025-09-10 02:26:55.139425', 'step': 764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:55.171418', 'step': 764, 'epoch': 1} {'type': 'loss', 'content': 0.30975061655044556, 'timestamp': '2025-09-10 02:26:55.175823', 'step': 765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.207930', 'step': 765, 'epoch': 1} {'type': 'loss', 'content': 0.2571072578430176, 'timestamp': '2025-09-10 02:26:55.210717', 'step': 766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:55.241723', 'step': 766, 'epoch': 1} {'type': 'loss', 'content': 0.12191256135702133, 'timestamp': '2025-09-10 02:26:55.243772', 'step': 767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.273401', 'step': 767, 'epoch': 1} {'type': 'loss', 'content': 0.19675396382808685, 'timestamp': '2025-09-10 02:26:55.298089', 'step': 768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.328067', 'step': 768, 'epoch': 1} {'type': 'loss', 'content': 0.2211351990699768, 'timestamp': '2025-09-10 02:26:55.329978', 'step': 769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.359339', 'step': 769, 'epoch': 1} {'type': 'loss', 'content': 0.1666213572025299, 'timestamp': '2025-09-10 02:26:55.361284', 'step': 770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:55.390617', 'step': 770, 'epoch': 1} {'type': 'loss', 'content': 0.14613281190395355, 'timestamp': '2025-09-10 02:26:55.392666', 'step': 771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:55.422269', 'step': 771, 'epoch': 1} {'type': 'loss', 'content': 0.08312154561281204, 'timestamp': '2025-09-10 02:26:55.445761', 'step': 772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.475779', 'step': 772, 'epoch': 1} {'type': 'loss', 'content': 0.1791703999042511, 'timestamp': '2025-09-10 02:26:55.479462', 'step': 773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:55.508623', 'step': 773, 'epoch': 1} {'type': 'loss', 'content': 0.21079131960868835, 'timestamp': '2025-09-10 02:26:55.510756', 'step': 774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:55.541673', 'step': 774, 'epoch': 1} {'type': 'loss', 'content': 0.11371973156929016, 'timestamp': '2025-09-10 02:26:55.543764', 'step': 775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.573212', 'step': 775, 'epoch': 1} {'type': 'loss', 'content': 0.26080748438835144, 'timestamp': '2025-09-10 02:26:55.596685', 'step': 776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.626556', 'step': 776, 'epoch': 1} {'type': 'loss', 'content': 0.2279275357723236, 'timestamp': '2025-09-10 02:26:55.631257', 'step': 777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:55.665098', 'step': 777, 'epoch': 1} {'type': 'loss', 'content': 0.09631168842315674, 'timestamp': '2025-09-10 02:26:55.667373', 'step': 778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.696637', 'step': 778, 'epoch': 1} {'type': 'loss', 'content': 0.2433023452758789, 'timestamp': '2025-09-10 02:26:55.698640', 'step': 779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.728500', 'step': 779, 'epoch': 1} {'type': 'loss', 'content': 0.24530725181102753, 'timestamp': '2025-09-10 02:26:55.752078', 'step': 780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:55.781364', 'step': 780, 'epoch': 1} {'type': 'loss', 'content': 0.12724512815475464, 'timestamp': '2025-09-10 02:26:55.783481', 'step': 781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:55.814195', 'step': 781, 'epoch': 1} {'type': 'loss', 'content': 0.15810485184192657, 'timestamp': '2025-09-10 02:26:55.816430', 'step': 782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.846044', 'step': 782, 'epoch': 1} {'type': 'loss', 'content': 0.24089738726615906, 'timestamp': '2025-09-10 02:26:55.848201', 'step': 783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:55.877779', 'step': 783, 'epoch': 1} {'type': 'loss', 'content': 0.22769711911678314, 'timestamp': '2025-09-10 02:26:55.901385', 'step': 784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.930873', 'step': 784, 'epoch': 1} {'type': 'loss', 'content': 0.22253282368183136, 'timestamp': '2025-09-10 02:26:55.932806', 'step': 785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.962202', 'step': 785, 'epoch': 1} {'type': 'loss', 'content': 0.2916627824306488, 'timestamp': '2025-09-10 02:26:55.964464', 'step': 786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:55.994657', 'step': 786, 'epoch': 1} {'type': 'loss', 'content': 0.23193955421447754, 'timestamp': '2025-09-10 02:26:55.996812', 'step': 787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:56.026317', 'step': 787, 'epoch': 1} {'type': 'loss', 'content': 0.13159850239753723, 'timestamp': '2025-09-10 02:26:56.049678', 'step': 788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.079886', 'step': 788, 'epoch': 1} {'type': 'loss', 'content': 0.16240312159061432, 'timestamp': '2025-09-10 02:26:56.082262', 'step': 789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:56.113009', 'step': 789, 'epoch': 1} {'type': 'loss', 'content': 0.2954603433609009, 'timestamp': '2025-09-10 02:26:56.115267', 'step': 790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:56.144492', 'step': 790, 'epoch': 1} {'type': 'loss', 'content': 0.20961761474609375, 'timestamp': '2025-09-10 02:26:56.146637', 'step': 791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.175866', 'step': 791, 'epoch': 1} {'type': 'loss', 'content': 0.17919056117534637, 'timestamp': '2025-09-10 02:26:56.199062', 'step': 792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.228162', 'step': 792, 'epoch': 1} {'type': 'loss', 'content': 0.15065741539001465, 'timestamp': '2025-09-10 02:26:56.230337', 'step': 793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:56.259229', 'step': 793, 'epoch': 1} {'type': 'loss', 'content': 0.28756964206695557, 'timestamp': '2025-09-10 02:26:56.261274', 'step': 794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:56.290565', 'step': 794, 'epoch': 1} {'type': 'loss', 'content': 0.15093427896499634, 'timestamp': '2025-09-10 02:26:56.293219', 'step': 795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.323043', 'step': 795, 'epoch': 1} {'type': 'loss', 'content': 0.15825946629047394, 'timestamp': '2025-09-10 02:26:56.346453', 'step': 796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:56.376606', 'step': 796, 'epoch': 1} {'type': 'loss', 'content': 0.09280471503734589, 'timestamp': '2025-09-10 02:26:56.378646', 'step': 797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:56.408292', 'step': 797, 'epoch': 1} {'type': 'loss', 'content': 0.11719126254320145, 'timestamp': '2025-09-10 02:26:56.410350', 'step': 798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:56.441959', 'step': 798, 'epoch': 1} {'type': 'loss', 'content': 0.23517319560050964, 'timestamp': '2025-09-10 02:26:56.443817', 'step': 799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.473283', 'step': 799, 'epoch': 1} {'type': 'loss', 'content': 0.2749243378639221, 'timestamp': '2025-09-10 02:26:56.496627', 'step': 800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.527260', 'step': 800, 'epoch': 1} {'type': 'loss', 'content': 0.20595234632492065, 'timestamp': '2025-09-10 02:26:56.529343', 'step': 801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:56.558587', 'step': 801, 'epoch': 1} {'type': 'loss', 'content': 0.13552574813365936, 'timestamp': '2025-09-10 02:26:56.560531', 'step': 802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:56.590642', 'step': 802, 'epoch': 1} {'type': 'loss', 'content': 0.22972221672534943, 'timestamp': '2025-09-10 02:26:56.592648', 'step': 803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:56.622758', 'step': 803, 'epoch': 1} {'type': 'loss', 'content': 0.18037596344947815, 'timestamp': '2025-09-10 02:26:56.646609', 'step': 804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.676468', 'step': 804, 'epoch': 1} {'type': 'loss', 'content': 0.13049328327178955, 'timestamp': '2025-09-10 02:26:56.681043', 'step': 805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.711154', 'step': 805, 'epoch': 1} {'type': 'loss', 'content': 0.21392066776752472, 'timestamp': '2025-09-10 02:26:56.713608', 'step': 806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:56.744354', 'step': 806, 'epoch': 1} {'type': 'loss', 'content': 0.2857343852519989, 'timestamp': '2025-09-10 02:26:56.746724', 'step': 807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:56.776237', 'step': 807, 'epoch': 1} {'type': 'loss', 'content': 0.20072652399539948, 'timestamp': '2025-09-10 02:26:56.800752', 'step': 808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.831018', 'step': 808, 'epoch': 1} {'type': 'loss', 'content': 0.2193874567747116, 'timestamp': '2025-09-10 02:26:56.833405', 'step': 809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.863743', 'step': 809, 'epoch': 1} {'type': 'loss', 'content': 0.1921696960926056, 'timestamp': '2025-09-10 02:26:56.865910', 'step': 810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.895656', 'step': 810, 'epoch': 1} {'type': 'loss', 'content': 0.23686274886131287, 'timestamp': '2025-09-10 02:26:56.898321', 'step': 811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:56.927916', 'step': 811, 'epoch': 1} {'type': 'loss', 'content': 0.1671248972415924, 'timestamp': '2025-09-10 02:26:56.951415', 'step': 812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:56.981360', 'step': 812, 'epoch': 1} {'type': 'loss', 'content': 0.1400599628686905, 'timestamp': '2025-09-10 02:26:56.983399', 'step': 813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.012784', 'step': 813, 'epoch': 1} {'type': 'loss', 'content': 0.18347413837909698, 'timestamp': '2025-09-10 02:26:57.014976', 'step': 814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:26:57.045437', 'step': 814, 'epoch': 1} {'type': 'loss', 'content': 0.1858665645122528, 'timestamp': '2025-09-10 02:26:57.049733', 'step': 815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.080034', 'step': 815, 'epoch': 1} {'type': 'loss', 'content': 0.18041573464870453, 'timestamp': '2025-09-10 02:26:57.103244', 'step': 816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:57.132940', 'step': 816, 'epoch': 1} {'type': 'loss', 'content': 0.19355086982250214, 'timestamp': '2025-09-10 02:26:57.140664', 'step': 817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.180748', 'step': 817, 'epoch': 1} {'type': 'loss', 'content': 0.19216802716255188, 'timestamp': '2025-09-10 02:26:57.182785', 'step': 818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:57.211945', 'step': 818, 'epoch': 1} {'type': 'loss', 'content': 0.1823323369026184, 'timestamp': '2025-09-10 02:26:57.214097', 'step': 819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.243940', 'step': 819, 'epoch': 1} {'type': 'loss', 'content': 0.18595251441001892, 'timestamp': '2025-09-10 02:26:57.267885', 'step': 820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:57.297324', 'step': 820, 'epoch': 1} {'type': 'loss', 'content': 0.27187803387641907, 'timestamp': '2025-09-10 02:26:57.300768', 'step': 821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.330372', 'step': 821, 'epoch': 1} {'type': 'loss', 'content': 0.1672859936952591, 'timestamp': '2025-09-10 02:26:57.332513', 'step': 822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.362171', 'step': 822, 'epoch': 1} {'type': 'loss', 'content': 0.14399674534797668, 'timestamp': '2025-09-10 02:26:57.364258', 'step': 823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.394446', 'step': 823, 'epoch': 1} {'type': 'loss', 'content': 0.14457359910011292, 'timestamp': '2025-09-10 02:26:57.417977', 'step': 824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:57.449595', 'step': 824, 'epoch': 1} {'type': 'loss', 'content': 0.2552657127380371, 'timestamp': '2025-09-10 02:26:57.451798', 'step': 825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.481419', 'step': 825, 'epoch': 1} {'type': 'loss', 'content': 0.1431102454662323, 'timestamp': '2025-09-10 02:26:57.483222', 'step': 826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.512985', 'step': 826, 'epoch': 1} {'type': 'loss', 'content': 0.30477970838546753, 'timestamp': '2025-09-10 02:26:57.515137', 'step': 827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.544362', 'step': 827, 'epoch': 1} {'type': 'loss', 'content': 0.23879915475845337, 'timestamp': '2025-09-10 02:26:57.567570', 'step': 828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:57.597909', 'step': 828, 'epoch': 1} {'type': 'loss', 'content': 0.1376725286245346, 'timestamp': '2025-09-10 02:26:57.600338', 'step': 829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:57.631402', 'step': 829, 'epoch': 1} {'type': 'loss', 'content': 0.14113518595695496, 'timestamp': '2025-09-10 02:26:57.633696', 'step': 830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.664042', 'step': 830, 'epoch': 1} {'type': 'loss', 'content': 0.3542945981025696, 'timestamp': '2025-09-10 02:26:57.666098', 'step': 831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.695541', 'step': 831, 'epoch': 1} {'type': 'loss', 'content': 0.1861773133277893, 'timestamp': '2025-09-10 02:26:57.718994', 'step': 832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.748771', 'step': 832, 'epoch': 1} {'type': 'loss', 'content': 0.12411831319332123, 'timestamp': '2025-09-10 02:26:57.750977', 'step': 833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:57.780300', 'step': 833, 'epoch': 1} {'type': 'loss', 'content': 0.1555291712284088, 'timestamp': '2025-09-10 02:26:57.783086', 'step': 834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:57.812325', 'step': 834, 'epoch': 1} {'type': 'loss', 'content': 0.1412263661623001, 'timestamp': '2025-09-10 02:26:57.814867', 'step': 835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:57.844669', 'step': 835, 'epoch': 1} {'type': 'loss', 'content': 0.16040334105491638, 'timestamp': '2025-09-10 02:26:57.868164', 'step': 836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.897871', 'step': 836, 'epoch': 1} {'type': 'loss', 'content': 0.1807011514902115, 'timestamp': '2025-09-10 02:26:57.900018', 'step': 837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:57.929750', 'step': 837, 'epoch': 1} {'type': 'loss', 'content': 0.20187819004058838, 'timestamp': '2025-09-10 02:26:57.932076', 'step': 838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:57.961531', 'step': 838, 'epoch': 1} {'type': 'loss', 'content': 0.30034807324409485, 'timestamp': '2025-09-10 02:26:57.963863', 'step': 839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.993202', 'step': 839, 'epoch': 1} {'type': 'loss', 'content': 0.18488618731498718, 'timestamp': '2025-09-10 02:26:58.016439', 'step': 840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:58.047703', 'step': 840, 'epoch': 1} {'type': 'loss', 'content': 0.1781824827194214, 'timestamp': '2025-09-10 02:26:58.049825', 'step': 841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.079437', 'step': 841, 'epoch': 1} {'type': 'loss', 'content': 0.10874950885772705, 'timestamp': '2025-09-10 02:26:58.081822', 'step': 842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.111520', 'step': 842, 'epoch': 1} {'type': 'loss', 'content': 0.2416393905878067, 'timestamp': '2025-09-10 02:26:58.113857', 'step': 843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.143574', 'step': 843, 'epoch': 1} {'type': 'loss', 'content': 0.24296671152114868, 'timestamp': '2025-09-10 02:26:58.167151', 'step': 844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:58.198130', 'step': 844, 'epoch': 1} {'type': 'loss', 'content': 0.1931033879518509, 'timestamp': '2025-09-10 02:26:58.200270', 'step': 845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:58.230413', 'step': 845, 'epoch': 1} {'type': 'loss', 'content': 0.22297483682632446, 'timestamp': '2025-09-10 02:26:58.232615', 'step': 846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.263423', 'step': 846, 'epoch': 1} {'type': 'loss', 'content': 0.18249709904193878, 'timestamp': '2025-09-10 02:26:58.265853', 'step': 847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:58.295800', 'step': 847, 'epoch': 1} {'type': 'loss', 'content': 0.23678404092788696, 'timestamp': '2025-09-10 02:26:58.319226', 'step': 848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.348621', 'step': 848, 'epoch': 1} {'type': 'loss', 'content': 0.14414329826831818, 'timestamp': '2025-09-10 02:26:58.351290', 'step': 849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:58.381804', 'step': 849, 'epoch': 1} {'type': 'loss', 'content': 0.16826874017715454, 'timestamp': '2025-09-10 02:26:58.383980', 'step': 850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.413128', 'step': 850, 'epoch': 1} {'type': 'loss', 'content': 0.2338331788778305, 'timestamp': '2025-09-10 02:26:58.415792', 'step': 851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.446699', 'step': 851, 'epoch': 1} {'type': 'loss', 'content': 0.15801475942134857, 'timestamp': '2025-09-10 02:26:58.470531', 'step': 852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.501032', 'step': 852, 'epoch': 1} {'type': 'loss', 'content': 0.16254065930843353, 'timestamp': '2025-09-10 02:26:58.503255', 'step': 853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:58.532866', 'step': 853, 'epoch': 1} {'type': 'loss', 'content': 0.18168526887893677, 'timestamp': '2025-09-10 02:26:58.535116', 'step': 854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.564598', 'step': 854, 'epoch': 1} {'type': 'loss', 'content': 0.17613035440444946, 'timestamp': '2025-09-10 02:26:58.566963', 'step': 855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.595572', 'step': 855, 'epoch': 1} {'type': 'loss', 'content': 0.15644565224647522, 'timestamp': '2025-09-10 02:26:58.618782', 'step': 856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.656045', 'step': 856, 'epoch': 1} {'type': 'loss', 'content': 0.29875853657722473, 'timestamp': '2025-09-10 02:26:58.658301', 'step': 857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:58.687712', 'step': 857, 'epoch': 1} {'type': 'loss', 'content': 0.21782837808132172, 'timestamp': '2025-09-10 02:26:58.689981', 'step': 858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.721767', 'step': 858, 'epoch': 1} {'type': 'loss', 'content': 0.1275743991136551, 'timestamp': '2025-09-10 02:26:58.726249', 'step': 859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:58.761056', 'step': 859, 'epoch': 1} {'type': 'loss', 'content': 0.19102409482002258, 'timestamp': '2025-09-10 02:26:58.784756', 'step': 860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:58.826774', 'step': 860, 'epoch': 1} {'type': 'loss', 'content': 0.14373363554477692, 'timestamp': '2025-09-10 02:26:58.828410', 'step': 861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.857875', 'step': 861, 'epoch': 1} {'type': 'loss', 'content': 0.07887914776802063, 'timestamp': '2025-09-10 02:26:58.860618', 'step': 862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:58.899792', 'step': 862, 'epoch': 1} {'type': 'loss', 'content': 0.2737746238708496, 'timestamp': '2025-09-10 02:26:58.902175', 'step': 863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:58.932150', 'step': 863, 'epoch': 1} {'type': 'loss', 'content': 0.1470339447259903, 'timestamp': '2025-09-10 02:26:58.955436', 'step': 864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.988718', 'step': 864, 'epoch': 1} {'type': 'loss', 'content': 0.18529824912548065, 'timestamp': '2025-09-10 02:26:58.990436', 'step': 865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.019380', 'step': 865, 'epoch': 1} {'type': 'loss', 'content': 0.2222997397184372, 'timestamp': '2025-09-10 02:26:59.022742', 'step': 866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:59.051549', 'step': 866, 'epoch': 1} {'type': 'loss', 'content': 0.1900091916322708, 'timestamp': '2025-09-10 02:26:59.053561', 'step': 867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.083323', 'step': 867, 'epoch': 1} {'type': 'loss', 'content': 0.22741921246051788, 'timestamp': '2025-09-10 02:26:59.107678', 'step': 868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:59.141850', 'step': 868, 'epoch': 1} {'type': 'loss', 'content': 0.12436936795711517, 'timestamp': '2025-09-10 02:26:59.148545', 'step': 869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.178974', 'step': 869, 'epoch': 1} {'type': 'loss', 'content': 0.21770405769348145, 'timestamp': '2025-09-10 02:26:59.180986', 'step': 870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.210010', 'step': 870, 'epoch': 1} {'type': 'loss', 'content': 0.10838493704795837, 'timestamp': '2025-09-10 02:26:59.211972', 'step': 871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:59.242257', 'step': 871, 'epoch': 1} {'type': 'loss', 'content': 0.15644800662994385, 'timestamp': '2025-09-10 02:26:59.265849', 'step': 872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:59.296788', 'step': 872, 'epoch': 1} {'type': 'loss', 'content': 0.22278259694576263, 'timestamp': '2025-09-10 02:26:59.298694', 'step': 873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:59.329673', 'step': 873, 'epoch': 1} {'type': 'loss', 'content': 0.2543168067932129, 'timestamp': '2025-09-10 02:26:59.333023', 'step': 874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.366847', 'step': 874, 'epoch': 1} {'type': 'loss', 'content': 0.2118016630411148, 'timestamp': '2025-09-10 02:26:59.369566', 'step': 875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:59.400499', 'step': 875, 'epoch': 1} {'type': 'loss', 'content': 0.17625409364700317, 'timestamp': '2025-09-10 02:26:59.424094', 'step': 876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:59.457176', 'step': 876, 'epoch': 1} {'type': 'loss', 'content': 0.18069957196712494, 'timestamp': '2025-09-10 02:26:59.459673', 'step': 877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.492107', 'step': 877, 'epoch': 1} {'type': 'loss', 'content': 0.16053299605846405, 'timestamp': '2025-09-10 02:26:59.494351', 'step': 878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:59.525486', 'step': 878, 'epoch': 1} {'type': 'loss', 'content': 0.159897580742836, 'timestamp': '2025-09-10 02:26:59.530171', 'step': 879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:59.561274', 'step': 879, 'epoch': 1} {'type': 'loss', 'content': 0.14742915332317352, 'timestamp': '2025-09-10 02:26:59.584691', 'step': 880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:59.614728', 'step': 880, 'epoch': 1} {'type': 'loss', 'content': 0.1313169002532959, 'timestamp': '2025-09-10 02:26:59.616956', 'step': 881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:59.646997', 'step': 881, 'epoch': 1} {'type': 'loss', 'content': 0.2311997413635254, 'timestamp': '2025-09-10 02:26:59.649809', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:59.680467', 'step': 882, 'epoch': 1} {'type': 'loss', 'content': 0.17339332401752472, 'timestamp': '2025-09-10 02:26:59.682995', 'step': 883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:59.712995', 'step': 883, 'epoch': 1} {'type': 'loss', 'content': 0.2083035558462143, 'timestamp': '2025-09-10 02:26:59.736421', 'step': 884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:59.765928', 'step': 884, 'epoch': 1} {'type': 'loss', 'content': 0.16570188105106354, 'timestamp': '2025-09-10 02:26:59.768073', 'step': 885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.797730', 'step': 885, 'epoch': 1} {'type': 'loss', 'content': 0.3467956483364105, 'timestamp': '2025-09-10 02:26:59.799714', 'step': 886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:26:59.830316', 'step': 886, 'epoch': 1} {'type': 'loss', 'content': 0.24967479705810547, 'timestamp': '2025-09-10 02:26:59.833815', 'step': 887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:59.863345', 'step': 887, 'epoch': 1} {'type': 'loss', 'content': 0.2776481509208679, 'timestamp': '2025-09-10 02:26:59.886770', 'step': 888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:59.916318', 'step': 888, 'epoch': 1} {'type': 'loss', 'content': 0.23089995980262756, 'timestamp': '2025-09-10 02:26:59.918243', 'step': 889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:59.947531', 'step': 889, 'epoch': 1} {'type': 'loss', 'content': 0.12950654327869415, 'timestamp': '2025-09-10 02:26:59.949936', 'step': 890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:59.979748', 'step': 890, 'epoch': 1} {'type': 'loss', 'content': 0.25356027483940125, 'timestamp': '2025-09-10 02:26:59.982205', 'step': 891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:00.012050', 'step': 891, 'epoch': 1} {'type': 'loss', 'content': 0.18435131013393402, 'timestamp': '2025-09-10 02:27:00.035548', 'step': 892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:00.066909', 'step': 892, 'epoch': 1} {'type': 'loss', 'content': 0.14287158846855164, 'timestamp': '2025-09-10 02:27:00.069265', 'step': 893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:00.099911', 'step': 893, 'epoch': 1} {'type': 'loss', 'content': 0.20362520217895508, 'timestamp': '2025-09-10 02:27:00.101915', 'step': 894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:00.131674', 'step': 894, 'epoch': 1} {'type': 'loss', 'content': 0.1347762495279312, 'timestamp': '2025-09-10 02:27:00.134449', 'step': 895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:00.167728', 'step': 895, 'epoch': 1} {'type': 'loss', 'content': 0.21847005188465118, 'timestamp': '2025-09-10 02:27:00.191223', 'step': 896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:00.223207', 'step': 896, 'epoch': 1} {'type': 'loss', 'content': 0.18318770825862885, 'timestamp': '2025-09-10 02:27:00.225515', 'step': 897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:00.255776', 'step': 897, 'epoch': 1} {'type': 'loss', 'content': 0.16777247190475464, 'timestamp': '2025-09-10 02:27:00.258363', 'step': 898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:00.288303', 'step': 898, 'epoch': 1} {'type': 'loss', 'content': 0.24956735968589783, 'timestamp': '2025-09-10 02:27:00.290874', 'step': 899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:00.320994', 'step': 899, 'epoch': 1} {'type': 'loss', 'content': 0.36609184741973877, 'timestamp': '2025-09-10 02:27:00.344633', 'step': 900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:00.374219', 'step': 900, 'epoch': 1} {'type': 'loss', 'content': 0.14964911341667175, 'timestamp': '2025-09-10 02:27:00.376223', 'step': 901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:00.406015', 'step': 901, 'epoch': 1} {'type': 'loss', 'content': 0.15116672217845917, 'timestamp': '2025-09-10 02:27:00.408354', 'step': 902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:00.439226', 'step': 902, 'epoch': 1} {'type': 'loss', 'content': 0.1695345938205719, 'timestamp': '2025-09-10 02:27:00.442372', 'step': 903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:00.472598', 'step': 903, 'epoch': 1} {'type': 'loss', 'content': 0.20657800137996674, 'timestamp': '2025-09-10 02:27:00.496115', 'step': 904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:00.526498', 'step': 904, 'epoch': 1} {'type': 'loss', 'content': 0.17900457978248596, 'timestamp': '2025-09-10 02:27:00.528876', 'step': 905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:00.558525', 'step': 905, 'epoch': 1} {'type': 'loss', 'content': 0.20687519013881683, 'timestamp': '2025-09-10 02:27:00.561035', 'step': 906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:00.591323', 'step': 906, 'epoch': 1} {'type': 'loss', 'content': 0.14041286706924438, 'timestamp': '2025-09-10 02:27:00.593602', 'step': 907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:00.623525', 'step': 907, 'epoch': 1} {'type': 'loss', 'content': 0.2089933305978775, 'timestamp': '2025-09-10 02:27:00.646988', 'step': 908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:00.678167', 'step': 908, 'epoch': 1} {'type': 'loss', 'content': 0.27686911821365356, 'timestamp': '2025-09-10 02:27:00.680336', 'step': 909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:00.709717', 'step': 909, 'epoch': 1} {'type': 'loss', 'content': 0.1881999373435974, 'timestamp': '2025-09-10 02:27:00.711564', 'step': 910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:00.741190', 'step': 910, 'epoch': 1} {'type': 'loss', 'content': 0.20919302105903625, 'timestamp': '2025-09-10 02:27:00.745272', 'step': 911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:00.775608', 'step': 911, 'epoch': 1} {'type': 'loss', 'content': 0.22174608707427979, 'timestamp': '2025-09-10 02:27:00.799229', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:00.829563', 'step': 912, 'epoch': 1} {'type': 'loss', 'content': 0.14575433731079102, 'timestamp': '2025-09-10 02:27:00.831853', 'step': 913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:00.862775', 'step': 913, 'epoch': 1} {'type': 'loss', 'content': 0.08973012119531631, 'timestamp': '2025-09-10 02:27:00.864819', 'step': 914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:00.895062', 'step': 914, 'epoch': 1} {'type': 'loss', 'content': 0.2584691643714905, 'timestamp': '2025-09-10 02:27:00.902339', 'step': 915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:00.932266', 'step': 915, 'epoch': 1} {'type': 'loss', 'content': 0.18223364651203156, 'timestamp': '2025-09-10 02:27:00.957638', 'step': 916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:00.988323', 'step': 916, 'epoch': 1} {'type': 'loss', 'content': 0.3042639195919037, 'timestamp': '2025-09-10 02:27:00.990981', 'step': 917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:01.021040', 'step': 917, 'epoch': 1} {'type': 'loss', 'content': 0.1693146526813507, 'timestamp': '2025-09-10 02:27:01.023389', 'step': 918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:01.053021', 'step': 918, 'epoch': 1} {'type': 'loss', 'content': 0.312094509601593, 'timestamp': '2025-09-10 02:27:01.055210', 'step': 919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:01.085868', 'step': 919, 'epoch': 1} {'type': 'loss', 'content': 0.11956065148115158, 'timestamp': '2025-09-10 02:27:01.109426', 'step': 920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:01.139198', 'step': 920, 'epoch': 1} {'type': 'loss', 'content': 0.24242012202739716, 'timestamp': '2025-09-10 02:27:01.141362', 'step': 921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:01.183153', 'step': 921, 'epoch': 1} {'type': 'loss', 'content': 0.17104840278625488, 'timestamp': '2025-09-10 02:27:01.185386', 'step': 922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:01.215243', 'step': 922, 'epoch': 1} {'type': 'loss', 'content': 0.18399159610271454, 'timestamp': '2025-09-10 02:27:01.217656', 'step': 923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:01.247660', 'step': 923, 'epoch': 1} {'type': 'loss', 'content': 0.28134843707084656, 'timestamp': '2025-09-10 02:27:01.272708', 'step': 924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:01.310460', 'step': 924, 'epoch': 1} {'type': 'loss', 'content': 0.15754903852939606, 'timestamp': '2025-09-10 02:27:01.315585', 'step': 925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:01.350788', 'step': 925, 'epoch': 1} {'type': 'loss', 'content': 0.2544834315776825, 'timestamp': '2025-09-10 02:27:01.354874', 'step': 926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:01.386517', 'step': 926, 'epoch': 1} {'type': 'loss', 'content': 0.27110010385513306, 'timestamp': '2025-09-10 02:27:01.388915', 'step': 927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:01.418676', 'step': 927, 'epoch': 1} {'type': 'loss', 'content': 0.17095603048801422, 'timestamp': '2025-09-10 02:27:01.442363', 'step': 928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:01.473141', 'step': 928, 'epoch': 1} {'type': 'loss', 'content': 0.14950257539749146, 'timestamp': '2025-09-10 02:27:01.475100', 'step': 929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:01.504531', 'step': 929, 'epoch': 1} {'type': 'loss', 'content': 0.18924953043460846, 'timestamp': '2025-09-10 02:27:01.507773', 'step': 930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:01.539034', 'step': 930, 'epoch': 1} {'type': 'loss', 'content': 0.15540562570095062, 'timestamp': '2025-09-10 02:27:01.541672', 'step': 931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:01.571841', 'step': 931, 'epoch': 1} {'type': 'loss', 'content': 0.3297311067581177, 'timestamp': '2025-09-10 02:27:01.595395', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:27:09.308408', 'step': 932, 'epoch': 1} {'type': 'pplx', 'content': 7597.507864978297, 'timestamp': '2025-09-10 02:27:09.318442', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:09.349747', 'step': 932, 'epoch': 1} {'type': 'loss', 'content': 0.11863166838884354, 'timestamp': '2025-09-10 02:27:09.351876', 'step': 933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.383698', 'step': 933, 'epoch': 1} {'type': 'loss', 'content': 0.14585715532302856, 'timestamp': '2025-09-10 02:27:09.385825', 'step': 934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.416603', 'step': 934, 'epoch': 1} {'type': 'loss', 'content': 0.27185508608818054, 'timestamp': '2025-09-10 02:27:09.419199', 'step': 935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.450705', 'step': 935, 'epoch': 1} {'type': 'loss', 'content': 0.14034272730350494, 'timestamp': '2025-09-10 02:27:09.475240', 'step': 936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.507268', 'step': 936, 'epoch': 1} {'type': 'loss', 'content': 0.24134226143360138, 'timestamp': '2025-09-10 02:27:09.509874', 'step': 937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:09.540860', 'step': 937, 'epoch': 1} {'type': 'loss', 'content': 0.29246985912323, 'timestamp': '2025-09-10 02:27:09.543117', 'step': 938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:09.573178', 'step': 938, 'epoch': 1} {'type': 'loss', 'content': 0.2275475412607193, 'timestamp': '2025-09-10 02:27:09.577559', 'step': 939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:09.607439', 'step': 939, 'epoch': 1} {'type': 'loss', 'content': 0.22577905654907227, 'timestamp': '2025-09-10 02:27:09.630906', 'step': 940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:09.660521', 'step': 940, 'epoch': 1} {'type': 'loss', 'content': 0.15362223982810974, 'timestamp': '2025-09-10 02:27:09.662661', 'step': 941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:09.692211', 'step': 941, 'epoch': 1} {'type': 'loss', 'content': 0.2507408857345581, 'timestamp': '2025-09-10 02:27:09.694191', 'step': 942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.725194', 'step': 942, 'epoch': 1} {'type': 'loss', 'content': 0.21640831232070923, 'timestamp': '2025-09-10 02:27:09.727250', 'step': 943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:09.756635', 'step': 943, 'epoch': 1} {'type': 'loss', 'content': 0.1532989889383316, 'timestamp': '2025-09-10 02:27:09.779870', 'step': 944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.809225', 'step': 944, 'epoch': 1} {'type': 'loss', 'content': 0.19195526838302612, 'timestamp': '2025-09-10 02:27:09.811464', 'step': 945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.840595', 'step': 945, 'epoch': 1} {'type': 'loss', 'content': 0.12260866165161133, 'timestamp': '2025-09-10 02:27:09.842747', 'step': 946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:09.872340', 'step': 946, 'epoch': 1} {'type': 'loss', 'content': 0.25784096121788025, 'timestamp': '2025-09-10 02:27:09.874203', 'step': 947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.904357', 'step': 947, 'epoch': 1} {'type': 'loss', 'content': 0.23959016799926758, 'timestamp': '2025-09-10 02:27:09.928001', 'step': 948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:09.958475', 'step': 948, 'epoch': 1} {'type': 'loss', 'content': 0.20824910700321198, 'timestamp': '2025-09-10 02:27:09.960752', 'step': 949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:09.992250', 'step': 949, 'epoch': 1} {'type': 'loss', 'content': 0.16875068843364716, 'timestamp': '2025-09-10 02:27:09.994477', 'step': 950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:10.023740', 'step': 950, 'epoch': 1} {'type': 'loss', 'content': 0.23582598567008972, 'timestamp': '2025-09-10 02:27:10.026707', 'step': 951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:10.057588', 'step': 951, 'epoch': 1} {'type': 'loss', 'content': 0.18785466253757477, 'timestamp': '2025-09-10 02:27:10.081325', 'step': 952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.112024', 'step': 952, 'epoch': 1} {'type': 'loss', 'content': 0.11664988100528717, 'timestamp': '2025-09-10 02:27:10.115298', 'step': 953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:10.145453', 'step': 953, 'epoch': 1} {'type': 'loss', 'content': 0.13442488014698029, 'timestamp': '2025-09-10 02:27:10.147613', 'step': 954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.177257', 'step': 954, 'epoch': 1} {'type': 'loss', 'content': 0.14678749442100525, 'timestamp': '2025-09-10 02:27:10.183146', 'step': 955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.212696', 'step': 955, 'epoch': 1} {'type': 'loss', 'content': 0.12725917994976044, 'timestamp': '2025-09-10 02:27:10.236199', 'step': 956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.266840', 'step': 956, 'epoch': 1} {'type': 'loss', 'content': 0.20168191194534302, 'timestamp': '2025-09-10 02:27:10.269184', 'step': 957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:10.298551', 'step': 957, 'epoch': 1} {'type': 'loss', 'content': 0.19329911470413208, 'timestamp': '2025-09-10 02:27:10.300685', 'step': 958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.330444', 'step': 958, 'epoch': 1} {'type': 'loss', 'content': 0.24769721925258636, 'timestamp': '2025-09-10 02:27:10.332811', 'step': 959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.362199', 'step': 959, 'epoch': 1} {'type': 'loss', 'content': 0.17005087435245514, 'timestamp': '2025-09-10 02:27:10.385610', 'step': 960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.415298', 'step': 960, 'epoch': 1} {'type': 'loss', 'content': 0.15700003504753113, 'timestamp': '2025-09-10 02:27:10.417326', 'step': 961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.447635', 'step': 961, 'epoch': 1} {'type': 'loss', 'content': 0.17773790657520294, 'timestamp': '2025-09-10 02:27:10.450079', 'step': 962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.480214', 'step': 962, 'epoch': 1} {'type': 'loss', 'content': 0.18954318761825562, 'timestamp': '2025-09-10 02:27:10.482332', 'step': 963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.512027', 'step': 963, 'epoch': 1} {'type': 'loss', 'content': 0.24997751414775848, 'timestamp': '2025-09-10 02:27:10.535700', 'step': 964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:10.566222', 'step': 964, 'epoch': 1} {'type': 'loss', 'content': 0.1994652897119522, 'timestamp': '2025-09-10 02:27:10.569359', 'step': 965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.601459', 'step': 965, 'epoch': 1} {'type': 'loss', 'content': 0.21845319867134094, 'timestamp': '2025-09-10 02:27:10.603452', 'step': 966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:10.633280', 'step': 966, 'epoch': 1} {'type': 'loss', 'content': 0.2024301141500473, 'timestamp': '2025-09-10 02:27:10.636115', 'step': 967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:10.665858', 'step': 967, 'epoch': 1} {'type': 'loss', 'content': 0.25534912943840027, 'timestamp': '2025-09-10 02:27:10.689271', 'step': 968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.719784', 'step': 968, 'epoch': 1} {'type': 'loss', 'content': 0.16552934050559998, 'timestamp': '2025-09-10 02:27:10.721878', 'step': 969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:10.751637', 'step': 969, 'epoch': 1} {'type': 'loss', 'content': 0.2727462649345398, 'timestamp': '2025-09-10 02:27:10.753742', 'step': 970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.783907', 'step': 970, 'epoch': 1} {'type': 'loss', 'content': 0.1560090184211731, 'timestamp': '2025-09-10 02:27:10.786220', 'step': 971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:10.816019', 'step': 971, 'epoch': 1} {'type': 'loss', 'content': 0.2790234386920929, 'timestamp': '2025-09-10 02:27:10.839460', 'step': 972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.869625', 'step': 972, 'epoch': 1} {'type': 'loss', 'content': 0.19261248409748077, 'timestamp': '2025-09-10 02:27:10.871766', 'step': 973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.902681', 'step': 973, 'epoch': 1} {'type': 'loss', 'content': 0.15216779708862305, 'timestamp': '2025-09-10 02:27:10.904747', 'step': 974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:10.934501', 'step': 974, 'epoch': 1} {'type': 'loss', 'content': 0.24322494864463806, 'timestamp': '2025-09-10 02:27:10.936507', 'step': 975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:10.966484', 'step': 975, 'epoch': 1} {'type': 'loss', 'content': 0.16922324895858765, 'timestamp': '2025-09-10 02:27:10.990048', 'step': 976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:11.020237', 'step': 976, 'epoch': 1} {'type': 'loss', 'content': 0.18006718158721924, 'timestamp': '2025-09-10 02:27:11.022293', 'step': 977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.051808', 'step': 977, 'epoch': 1} {'type': 'loss', 'content': 0.21169695258140564, 'timestamp': '2025-09-10 02:27:11.054095', 'step': 978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.083902', 'step': 978, 'epoch': 1} {'type': 'loss', 'content': 0.2314988076686859, 'timestamp': '2025-09-10 02:27:11.086329', 'step': 979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.115870', 'step': 979, 'epoch': 1} {'type': 'loss', 'content': 0.1910230666399002, 'timestamp': '2025-09-10 02:27:11.139134', 'step': 980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.168970', 'step': 980, 'epoch': 1} {'type': 'loss', 'content': 0.12954120337963104, 'timestamp': '2025-09-10 02:27:11.171260', 'step': 981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.200828', 'step': 981, 'epoch': 1} {'type': 'loss', 'content': 0.13209041953086853, 'timestamp': '2025-09-10 02:27:11.203148', 'step': 982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.233672', 'step': 982, 'epoch': 1} {'type': 'loss', 'content': 0.13711461424827576, 'timestamp': '2025-09-10 02:27:11.235979', 'step': 983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.265197', 'step': 983, 'epoch': 1} {'type': 'loss', 'content': 0.31963181495666504, 'timestamp': '2025-09-10 02:27:11.288759', 'step': 984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:11.320799', 'step': 984, 'epoch': 1} {'type': 'loss', 'content': 0.17975135147571564, 'timestamp': '2025-09-10 02:27:11.323108', 'step': 985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.354075', 'step': 985, 'epoch': 1} {'type': 'loss', 'content': 0.08972207456827164, 'timestamp': '2025-09-10 02:27:11.356220', 'step': 986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:11.388484', 'step': 986, 'epoch': 1} {'type': 'loss', 'content': 0.14344486594200134, 'timestamp': '2025-09-10 02:27:11.390679', 'step': 987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.420935', 'step': 987, 'epoch': 1} {'type': 'loss', 'content': 0.25129079818725586, 'timestamp': '2025-09-10 02:27:11.444262', 'step': 988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.474084', 'step': 988, 'epoch': 1} {'type': 'loss', 'content': 0.22979092597961426, 'timestamp': '2025-09-10 02:27:11.476147', 'step': 989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.507214', 'step': 989, 'epoch': 1} {'type': 'loss', 'content': 0.16741050779819489, 'timestamp': '2025-09-10 02:27:11.509438', 'step': 990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.539417', 'step': 990, 'epoch': 1} {'type': 'loss', 'content': 0.1082465723156929, 'timestamp': '2025-09-10 02:27:11.541631', 'step': 991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.571651', 'step': 991, 'epoch': 1} {'type': 'loss', 'content': 0.1878109574317932, 'timestamp': '2025-09-10 02:27:11.595871', 'step': 992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:11.625992', 'step': 992, 'epoch': 1} {'type': 'loss', 'content': 0.2013893723487854, 'timestamp': '2025-09-10 02:27:11.628193', 'step': 993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:11.658399', 'step': 993, 'epoch': 1} {'type': 'loss', 'content': 0.24016988277435303, 'timestamp': '2025-09-10 02:27:11.661777', 'step': 994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.692548', 'step': 994, 'epoch': 1} {'type': 'loss', 'content': 0.15641319751739502, 'timestamp': '2025-09-10 02:27:11.694835', 'step': 995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:11.724753', 'step': 995, 'epoch': 1} {'type': 'loss', 'content': 0.22670170664787292, 'timestamp': '2025-09-10 02:27:11.748989', 'step': 996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:11.779028', 'step': 996, 'epoch': 1} {'type': 'loss', 'content': 0.17176158726215363, 'timestamp': '2025-09-10 02:27:11.781195', 'step': 997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:11.811508', 'step': 997, 'epoch': 1} {'type': 'loss', 'content': 0.1986025869846344, 'timestamp': '2025-09-10 02:27:11.813734', 'step': 998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.844195', 'step': 998, 'epoch': 1} {'type': 'loss', 'content': 0.18298189342021942, 'timestamp': '2025-09-10 02:27:11.846715', 'step': 999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.876670', 'step': 999, 'epoch': 1} {'type': 'loss', 'content': 0.14773091673851013, 'timestamp': '2025-09-10 02:27:11.900045', 'step': 1000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-09-10 02:27:16.443202', 'step': 1000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:16.479320', 'step': 1000, 'epoch': 1} {'type': 'loss', 'content': 0.18360276520252228, 'timestamp': '2025-09-10 02:27:16.481528', 'step': 1001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:16.512373', 'step': 1001, 'epoch': 1} {'type': 'loss', 'content': 0.22811006009578705, 'timestamp': '2025-09-10 02:27:16.514472', 'step': 1002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.545626', 'step': 1002, 'epoch': 1} {'type': 'loss', 'content': 0.3004962205886841, 'timestamp': '2025-09-10 02:27:16.549008', 'step': 1003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:16.581038', 'step': 1003, 'epoch': 1} {'type': 'loss', 'content': 0.14789311587810516, 'timestamp': '2025-09-10 02:27:16.604592', 'step': 1004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.636705', 'step': 1004, 'epoch': 1} {'type': 'loss', 'content': 0.1925252079963684, 'timestamp': '2025-09-10 02:27:16.639087', 'step': 1005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.670453', 'step': 1005, 'epoch': 1} {'type': 'loss', 'content': 0.16256017982959747, 'timestamp': '2025-09-10 02:27:16.672651', 'step': 1006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:16.704048', 'step': 1006, 'epoch': 1} {'type': 'loss', 'content': 0.20819640159606934, 'timestamp': '2025-09-10 02:27:16.706271', 'step': 1007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.736105', 'step': 1007, 'epoch': 1} {'type': 'loss', 'content': 0.15134862065315247, 'timestamp': '2025-09-10 02:27:16.759759', 'step': 1008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:16.790690', 'step': 1008, 'epoch': 1} {'type': 'loss', 'content': 0.2348453402519226, 'timestamp': '2025-09-10 02:27:16.793666', 'step': 1009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:16.825633', 'step': 1009, 'epoch': 1} {'type': 'loss', 'content': 0.12984946370124817, 'timestamp': '2025-09-10 02:27:16.828745', 'step': 1010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:16.861364', 'step': 1010, 'epoch': 1} {'type': 'loss', 'content': 0.2405894547700882, 'timestamp': '2025-09-10 02:27:16.863836', 'step': 1011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:16.894972', 'step': 1011, 'epoch': 1} {'type': 'loss', 'content': 0.19891224801540375, 'timestamp': '2025-09-10 02:27:16.918822', 'step': 1012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:16.950351', 'step': 1012, 'epoch': 1} {'type': 'loss', 'content': 0.2387792021036148, 'timestamp': '2025-09-10 02:27:16.954102', 'step': 1013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.985594', 'step': 1013, 'epoch': 1} {'type': 'loss', 'content': 0.19056440889835358, 'timestamp': '2025-09-10 02:27:16.988120', 'step': 1014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.018709', 'step': 1014, 'epoch': 1} {'type': 'loss', 'content': 0.12232933938503265, 'timestamp': '2025-09-10 02:27:17.021023', 'step': 1015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:17.050647', 'step': 1015, 'epoch': 1} {'type': 'loss', 'content': 0.30387625098228455, 'timestamp': '2025-09-10 02:27:17.075574', 'step': 1016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:17.106071', 'step': 1016, 'epoch': 1} {'type': 'loss', 'content': 0.14485669136047363, 'timestamp': '2025-09-10 02:27:17.108240', 'step': 1017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.137949', 'step': 1017, 'epoch': 1} {'type': 'loss', 'content': 0.1257137805223465, 'timestamp': '2025-09-10 02:27:17.141918', 'step': 1018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.175032', 'step': 1018, 'epoch': 1} {'type': 'loss', 'content': 0.14915227890014648, 'timestamp': '2025-09-10 02:27:17.177183', 'step': 1019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:17.207182', 'step': 1019, 'epoch': 1} {'type': 'loss', 'content': 0.21181020140647888, 'timestamp': '2025-09-10 02:27:17.231276', 'step': 1020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:17.261348', 'step': 1020, 'epoch': 1} {'type': 'loss', 'content': 0.1728980988264084, 'timestamp': '2025-09-10 02:27:17.263445', 'step': 1021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.293150', 'step': 1021, 'epoch': 1} {'type': 'loss', 'content': 0.2833641767501831, 'timestamp': '2025-09-10 02:27:17.295203', 'step': 1022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:17.324717', 'step': 1022, 'epoch': 1} {'type': 'loss', 'content': 0.1845497041940689, 'timestamp': '2025-09-10 02:27:17.327106', 'step': 1023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.357329', 'step': 1023, 'epoch': 1} {'type': 'loss', 'content': 0.16210585832595825, 'timestamp': '2025-09-10 02:27:17.381415', 'step': 1024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.411871', 'step': 1024, 'epoch': 1} {'type': 'loss', 'content': 0.13100995123386383, 'timestamp': '2025-09-10 02:27:17.413785', 'step': 1025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.443368', 'step': 1025, 'epoch': 1} {'type': 'loss', 'content': 0.20044218003749847, 'timestamp': '2025-09-10 02:27:17.445724', 'step': 1026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.475364', 'step': 1026, 'epoch': 1} {'type': 'loss', 'content': 0.14783288538455963, 'timestamp': '2025-09-10 02:27:17.477528', 'step': 1027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.507358', 'step': 1027, 'epoch': 1} {'type': 'loss', 'content': 0.2663753032684326, 'timestamp': '2025-09-10 02:27:17.530898', 'step': 1028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.560728', 'step': 1028, 'epoch': 1} {'type': 'loss', 'content': 0.256290078163147, 'timestamp': '2025-09-10 02:27:17.564283', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:17.598764', 'step': 1029, 'epoch': 1} {'type': 'loss', 'content': 0.1458352953195572, 'timestamp': '2025-09-10 02:27:17.601305', 'step': 1030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.633417', 'step': 1030, 'epoch': 1} {'type': 'loss', 'content': 0.29890626668930054, 'timestamp': '2025-09-10 02:27:17.636176', 'step': 1031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.667238', 'step': 1031, 'epoch': 1} {'type': 'loss', 'content': 0.19144555926322937, 'timestamp': '2025-09-10 02:27:17.690671', 'step': 1032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:17.722293', 'step': 1032, 'epoch': 1} {'type': 'loss', 'content': 0.2737179398536682, 'timestamp': '2025-09-10 02:27:17.724517', 'step': 1033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.754478', 'step': 1033, 'epoch': 1} {'type': 'loss', 'content': 0.14221656322479248, 'timestamp': '2025-09-10 02:27:17.757106', 'step': 1034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:17.790278', 'step': 1034, 'epoch': 1} {'type': 'loss', 'content': 0.290533185005188, 'timestamp': '2025-09-10 02:27:17.792392', 'step': 1035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.822197', 'step': 1035, 'epoch': 1} {'type': 'loss', 'content': 0.25461718440055847, 'timestamp': '2025-09-10 02:27:17.845863', 'step': 1036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.880450', 'step': 1036, 'epoch': 1} {'type': 'loss', 'content': 0.21682392060756683, 'timestamp': '2025-09-10 02:27:17.882647', 'step': 1037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:17.913323', 'step': 1037, 'epoch': 1} {'type': 'loss', 'content': 0.15734152495861053, 'timestamp': '2025-09-10 02:27:17.915637', 'step': 1038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.946578', 'step': 1038, 'epoch': 1} {'type': 'loss', 'content': 0.12433726340532303, 'timestamp': '2025-09-10 02:27:17.950184', 'step': 1039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:17.979806', 'step': 1039, 'epoch': 1} {'type': 'loss', 'content': 0.21165816485881805, 'timestamp': '2025-09-10 02:27:18.004744', 'step': 1040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.035203', 'step': 1040, 'epoch': 1} {'type': 'loss', 'content': 0.24032491445541382, 'timestamp': '2025-09-10 02:27:18.039392', 'step': 1041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.069727', 'step': 1041, 'epoch': 1} {'type': 'loss', 'content': 0.2344922125339508, 'timestamp': '2025-09-10 02:27:18.072008', 'step': 1042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:18.102510', 'step': 1042, 'epoch': 1} {'type': 'loss', 'content': 0.15674595534801483, 'timestamp': '2025-09-10 02:27:18.108587', 'step': 1043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.139654', 'step': 1043, 'epoch': 1} {'type': 'loss', 'content': 0.22358353435993195, 'timestamp': '2025-09-10 02:27:18.163460', 'step': 1044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:18.194368', 'step': 1044, 'epoch': 1} {'type': 'loss', 'content': 0.15776410698890686, 'timestamp': '2025-09-10 02:27:18.196725', 'step': 1045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.239577', 'step': 1045, 'epoch': 1} {'type': 'loss', 'content': 0.10697008669376373, 'timestamp': '2025-09-10 02:27:18.241705', 'step': 1046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.275132', 'step': 1046, 'epoch': 1} {'type': 'loss', 'content': 0.21414180099964142, 'timestamp': '2025-09-10 02:27:18.279740', 'step': 1047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:18.311127', 'step': 1047, 'epoch': 1} {'type': 'loss', 'content': 0.23152010142803192, 'timestamp': '2025-09-10 02:27:18.335557', 'step': 1048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.365246', 'step': 1048, 'epoch': 1} {'type': 'loss', 'content': 0.244158074259758, 'timestamp': '2025-09-10 02:27:18.367704', 'step': 1049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:18.401355', 'step': 1049, 'epoch': 1} {'type': 'loss', 'content': 0.17582497000694275, 'timestamp': '2025-09-10 02:27:18.403794', 'step': 1050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.434477', 'step': 1050, 'epoch': 1} {'type': 'loss', 'content': 0.14334389567375183, 'timestamp': '2025-09-10 02:27:18.437232', 'step': 1051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:18.474009', 'step': 1051, 'epoch': 1} {'type': 'loss', 'content': 0.14967747032642365, 'timestamp': '2025-09-10 02:27:18.497493', 'step': 1052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.527990', 'step': 1052, 'epoch': 1} {'type': 'loss', 'content': 0.17475464940071106, 'timestamp': '2025-09-10 02:27:18.530196', 'step': 1053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:18.560204', 'step': 1053, 'epoch': 1} {'type': 'loss', 'content': 0.15708476305007935, 'timestamp': '2025-09-10 02:27:18.561983', 'step': 1054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.591460', 'step': 1054, 'epoch': 1} {'type': 'loss', 'content': 0.13739891350269318, 'timestamp': '2025-09-10 02:27:18.594156', 'step': 1055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:18.624967', 'step': 1055, 'epoch': 1} {'type': 'loss', 'content': 0.24162130057811737, 'timestamp': '2025-09-10 02:27:18.649962', 'step': 1056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.680551', 'step': 1056, 'epoch': 1} {'type': 'loss', 'content': 0.2697772979736328, 'timestamp': '2025-09-10 02:27:18.682932', 'step': 1057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.712200', 'step': 1057, 'epoch': 1} {'type': 'loss', 'content': 0.1672133207321167, 'timestamp': '2025-09-10 02:27:18.714445', 'step': 1058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.743845', 'step': 1058, 'epoch': 1} {'type': 'loss', 'content': 0.20198608934879303, 'timestamp': '2025-09-10 02:27:18.746252', 'step': 1059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:18.776298', 'step': 1059, 'epoch': 1} {'type': 'loss', 'content': 0.2567630410194397, 'timestamp': '2025-09-10 02:27:18.800028', 'step': 1060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.830382', 'step': 1060, 'epoch': 1} {'type': 'loss', 'content': 0.3222164213657379, 'timestamp': '2025-09-10 02:27:18.832464', 'step': 1061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.862420', 'step': 1061, 'epoch': 1} {'type': 'loss', 'content': 0.14489583671092987, 'timestamp': '2025-09-10 02:27:18.864606', 'step': 1062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.894159', 'step': 1062, 'epoch': 1} {'type': 'loss', 'content': 0.17025449872016907, 'timestamp': '2025-09-10 02:27:18.896453', 'step': 1063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.926540', 'step': 1063, 'epoch': 1} {'type': 'loss', 'content': 0.16756120324134827, 'timestamp': '2025-09-10 02:27:18.950076', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.979698', 'step': 1064, 'epoch': 1} {'type': 'loss', 'content': 0.1847585290670395, 'timestamp': '2025-09-10 02:27:18.984575', 'step': 1065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.015182', 'step': 1065, 'epoch': 1} {'type': 'loss', 'content': 0.2073465883731842, 'timestamp': '2025-09-10 02:27:19.017371', 'step': 1066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:19.047192', 'step': 1066, 'epoch': 1} {'type': 'loss', 'content': 0.20617592334747314, 'timestamp': '2025-09-10 02:27:19.049612', 'step': 1067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.079339', 'step': 1067, 'epoch': 1} {'type': 'loss', 'content': 0.182595893740654, 'timestamp': '2025-09-10 02:27:19.104474', 'step': 1068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.135069', 'step': 1068, 'epoch': 1} {'type': 'loss', 'content': 0.14778681099414825, 'timestamp': '2025-09-10 02:27:19.137243', 'step': 1069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.166625', 'step': 1069, 'epoch': 1} {'type': 'loss', 'content': 0.18771407008171082, 'timestamp': '2025-09-10 02:27:19.168643', 'step': 1070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.200241', 'step': 1070, 'epoch': 1} {'type': 'loss', 'content': 0.23027975857257843, 'timestamp': '2025-09-10 02:27:19.202286', 'step': 1071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.238218', 'step': 1071, 'epoch': 1} {'type': 'loss', 'content': 0.20603835582733154, 'timestamp': '2025-09-10 02:27:19.261744', 'step': 1072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.292299', 'step': 1072, 'epoch': 1} {'type': 'loss', 'content': 0.1793052852153778, 'timestamp': '2025-09-10 02:27:19.294584', 'step': 1073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.324716', 'step': 1073, 'epoch': 1} {'type': 'loss', 'content': 0.18491433560848236, 'timestamp': '2025-09-10 02:27:19.326738', 'step': 1074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:19.356046', 'step': 1074, 'epoch': 1} {'type': 'loss', 'content': 0.175201416015625, 'timestamp': '2025-09-10 02:27:19.358287', 'step': 1075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.388436', 'step': 1075, 'epoch': 1} {'type': 'loss', 'content': 0.12857085466384888, 'timestamp': '2025-09-10 02:27:19.412040', 'step': 1076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.442520', 'step': 1076, 'epoch': 1} {'type': 'loss', 'content': 0.2599168121814728, 'timestamp': '2025-09-10 02:27:19.444685', 'step': 1077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.474413', 'step': 1077, 'epoch': 1} {'type': 'loss', 'content': 0.17724213004112244, 'timestamp': '2025-09-10 02:27:19.476304', 'step': 1078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:19.505474', 'step': 1078, 'epoch': 1} {'type': 'loss', 'content': 0.10456297546625137, 'timestamp': '2025-09-10 02:27:19.507978', 'step': 1079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:19.538003', 'step': 1079, 'epoch': 1} {'type': 'loss', 'content': 0.25731584429740906, 'timestamp': '2025-09-10 02:27:19.561645', 'step': 1080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:19.591495', 'step': 1080, 'epoch': 1} {'type': 'loss', 'content': 0.1364588439464569, 'timestamp': '2025-09-10 02:27:19.593723', 'step': 1081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:19.624946', 'step': 1081, 'epoch': 1} {'type': 'loss', 'content': 0.14426998794078827, 'timestamp': '2025-09-10 02:27:19.627298', 'step': 1082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.657932', 'step': 1082, 'epoch': 1} {'type': 'loss', 'content': 0.25740042328834534, 'timestamp': '2025-09-10 02:27:19.660744', 'step': 1083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:19.690693', 'step': 1083, 'epoch': 1} {'type': 'loss', 'content': 0.08850234746932983, 'timestamp': '2025-09-10 02:27:19.714170', 'step': 1084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.745780', 'step': 1084, 'epoch': 1} {'type': 'loss', 'content': 0.2270834594964981, 'timestamp': '2025-09-10 02:27:19.748008', 'step': 1085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:19.777502', 'step': 1085, 'epoch': 1} {'type': 'loss', 'content': 0.1859290897846222, 'timestamp': '2025-09-10 02:27:19.780114', 'step': 1086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:19.810361', 'step': 1086, 'epoch': 1} {'type': 'loss', 'content': 0.1905265748500824, 'timestamp': '2025-09-10 02:27:19.812616', 'step': 1087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.842030', 'step': 1087, 'epoch': 1} {'type': 'loss', 'content': 0.24052219092845917, 'timestamp': '2025-09-10 02:27:19.865671', 'step': 1088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:19.895502', 'step': 1088, 'epoch': 1} {'type': 'loss', 'content': 0.1225886344909668, 'timestamp': '2025-09-10 02:27:19.898421', 'step': 1089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.928049', 'step': 1089, 'epoch': 1} {'type': 'loss', 'content': 0.15299268066883087, 'timestamp': '2025-09-10 02:27:19.930295', 'step': 1090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:19.960912', 'step': 1090, 'epoch': 1} {'type': 'loss', 'content': 0.13282351195812225, 'timestamp': '2025-09-10 02:27:19.962871', 'step': 1091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:19.992861', 'step': 1091, 'epoch': 1} {'type': 'loss', 'content': 0.15323102474212646, 'timestamp': '2025-09-10 02:27:20.016453', 'step': 1092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:20.054958', 'step': 1092, 'epoch': 1} {'type': 'loss', 'content': 0.17675672471523285, 'timestamp': '2025-09-10 02:27:20.057404', 'step': 1093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:20.088323', 'step': 1093, 'epoch': 1} {'type': 'loss', 'content': 0.15298083424568176, 'timestamp': '2025-09-10 02:27:20.090742', 'step': 1094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:20.121392', 'step': 1094, 'epoch': 1} {'type': 'loss', 'content': 0.20087750256061554, 'timestamp': '2025-09-10 02:27:20.123494', 'step': 1095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:20.153771', 'step': 1095, 'epoch': 1} {'type': 'loss', 'content': 0.15960554778575897, 'timestamp': '2025-09-10 02:27:20.177647', 'step': 1096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:20.210277', 'step': 1096, 'epoch': 1} {'type': 'loss', 'content': 0.24234160780906677, 'timestamp': '2025-09-10 02:27:20.219387', 'step': 1097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:20.249042', 'step': 1097, 'epoch': 1} {'type': 'loss', 'content': 0.22034192085266113, 'timestamp': '2025-09-10 02:27:20.251669', 'step': 1098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:20.284296', 'step': 1098, 'epoch': 1} {'type': 'loss', 'content': 0.1669074296951294, 'timestamp': '2025-09-10 02:27:20.286799', 'step': 1099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:20.317370', 'step': 1099, 'epoch': 1} {'type': 'loss', 'content': 0.1545325368642807, 'timestamp': '2025-09-10 02:27:20.340864', 'step': 1100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:20.371197', 'step': 1100, 'epoch': 1} {'type': 'loss', 'content': 0.11914857476949692, 'timestamp': '2025-09-10 02:27:20.373747', 'step': 1101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:20.403592', 'step': 1101, 'epoch': 1} {'type': 'loss', 'content': 0.140835240483284, 'timestamp': '2025-09-10 02:27:20.405768', 'step': 1102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:20.435769', 'step': 1102, 'epoch': 1} {'type': 'loss', 'content': 0.22200194001197815, 'timestamp': '2025-09-10 02:27:20.438175', 'step': 1103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:20.468183', 'step': 1103, 'epoch': 1} {'type': 'loss', 'content': 0.14238335192203522, 'timestamp': '2025-09-10 02:27:20.491751', 'step': 1104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:20.523432', 'step': 1104, 'epoch': 1} {'type': 'loss', 'content': 0.2379974126815796, 'timestamp': '2025-09-10 02:27:20.525701', 'step': 1105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:20.555388', 'step': 1105, 'epoch': 1} {'type': 'loss', 'content': 0.16146284341812134, 'timestamp': '2025-09-10 02:27:20.557980', 'step': 1106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:20.587317', 'step': 1106, 'epoch': 1} {'type': 'loss', 'content': 0.2217317372560501, 'timestamp': '2025-09-10 02:27:20.589619', 'step': 1107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:20.621523', 'step': 1107, 'epoch': 1} {'type': 'loss', 'content': 0.23911094665527344, 'timestamp': '2025-09-10 02:27:20.645275', 'step': 1108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:20.675193', 'step': 1108, 'epoch': 1} {'type': 'loss', 'content': 0.16677966713905334, 'timestamp': '2025-09-10 02:27:20.677395', 'step': 1109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:20.707021', 'step': 1109, 'epoch': 1} {'type': 'loss', 'content': 0.1605684608221054, 'timestamp': '2025-09-10 02:27:20.709286', 'step': 1110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:20.738933', 'step': 1110, 'epoch': 1} {'type': 'loss', 'content': 0.15189175307750702, 'timestamp': '2025-09-10 02:27:20.741658', 'step': 1111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:20.771815', 'step': 1111, 'epoch': 1} {'type': 'loss', 'content': 0.21041585505008698, 'timestamp': '2025-09-10 02:27:20.795280', 'step': 1112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:20.826365', 'step': 1112, 'epoch': 1} {'type': 'loss', 'content': 0.19347037374973297, 'timestamp': '2025-09-10 02:27:20.828505', 'step': 1113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:20.858916', 'step': 1113, 'epoch': 1} {'type': 'loss', 'content': 0.1667727679014206, 'timestamp': '2025-09-10 02:27:20.861141', 'step': 1114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:20.891793', 'step': 1114, 'epoch': 1} {'type': 'loss', 'content': 0.12645301222801208, 'timestamp': '2025-09-10 02:27:20.893905', 'step': 1115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:20.923362', 'step': 1115, 'epoch': 1} {'type': 'loss', 'content': 0.14857307076454163, 'timestamp': '2025-09-10 02:27:20.949886', 'step': 1116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:20.990389', 'step': 1116, 'epoch': 1} {'type': 'loss', 'content': 0.204845130443573, 'timestamp': '2025-09-10 02:27:20.992675', 'step': 1117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.022762', 'step': 1117, 'epoch': 1} {'type': 'loss', 'content': 0.21016617119312286, 'timestamp': '2025-09-10 02:27:21.025102', 'step': 1118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:21.055174', 'step': 1118, 'epoch': 1} {'type': 'loss', 'content': 0.20092740654945374, 'timestamp': '2025-09-10 02:27:21.057509', 'step': 1119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.087218', 'step': 1119, 'epoch': 1} {'type': 'loss', 'content': 0.13095667958259583, 'timestamp': '2025-09-10 02:27:21.110791', 'step': 1120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.140944', 'step': 1120, 'epoch': 1} {'type': 'loss', 'content': 0.2379988729953766, 'timestamp': '2025-09-10 02:27:21.142975', 'step': 1121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:21.172223', 'step': 1121, 'epoch': 1} {'type': 'loss', 'content': 0.20324668288230896, 'timestamp': '2025-09-10 02:27:21.174408', 'step': 1122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:21.204190', 'step': 1122, 'epoch': 1} {'type': 'loss', 'content': 0.14686931669712067, 'timestamp': '2025-09-10 02:27:21.206253', 'step': 1123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.239894', 'step': 1123, 'epoch': 1} {'type': 'loss', 'content': 0.19900833070278168, 'timestamp': '2025-09-10 02:27:21.263726', 'step': 1124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:21.293578', 'step': 1124, 'epoch': 1} {'type': 'loss', 'content': 0.14025944471359253, 'timestamp': '2025-09-10 02:27:21.295776', 'step': 1125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.325496', 'step': 1125, 'epoch': 1} {'type': 'loss', 'content': 0.14987248182296753, 'timestamp': '2025-09-10 02:27:21.328291', 'step': 1126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:21.358115', 'step': 1126, 'epoch': 1} {'type': 'loss', 'content': 0.18004384636878967, 'timestamp': '2025-09-10 02:27:21.360956', 'step': 1127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.390889', 'step': 1127, 'epoch': 1} {'type': 'loss', 'content': 0.21972401440143585, 'timestamp': '2025-09-10 02:27:21.414422', 'step': 1128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.444412', 'step': 1128, 'epoch': 1} {'type': 'loss', 'content': 0.1134793683886528, 'timestamp': '2025-09-10 02:27:21.446858', 'step': 1129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:21.476957', 'step': 1129, 'epoch': 1} {'type': 'loss', 'content': 0.21592941880226135, 'timestamp': '2025-09-10 02:27:21.479086', 'step': 1130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:21.509136', 'step': 1130, 'epoch': 1} {'type': 'loss', 'content': 0.2566622495651245, 'timestamp': '2025-09-10 02:27:21.512267', 'step': 1131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.541478', 'step': 1131, 'epoch': 1} {'type': 'loss', 'content': 0.15046919882297516, 'timestamp': '2025-09-10 02:27:21.564954', 'step': 1132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.595319', 'step': 1132, 'epoch': 1} {'type': 'loss', 'content': 0.2625887393951416, 'timestamp': '2025-09-10 02:27:21.598605', 'step': 1133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.628347', 'step': 1133, 'epoch': 1} {'type': 'loss', 'content': 0.1586204171180725, 'timestamp': '2025-09-10 02:27:21.631393', 'step': 1134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:21.660508', 'step': 1134, 'epoch': 1} {'type': 'loss', 'content': 0.3257511854171753, 'timestamp': '2025-09-10 02:27:21.662651', 'step': 1135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.692456', 'step': 1135, 'epoch': 1} {'type': 'loss', 'content': 0.27844104170799255, 'timestamp': '2025-09-10 02:27:21.715872', 'step': 1136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.745898', 'step': 1136, 'epoch': 1} {'type': 'loss', 'content': 0.16296476125717163, 'timestamp': '2025-09-10 02:27:21.748253', 'step': 1137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.777625', 'step': 1137, 'epoch': 1} {'type': 'loss', 'content': 0.18752390146255493, 'timestamp': '2025-09-10 02:27:21.779779', 'step': 1138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:21.810122', 'step': 1138, 'epoch': 1} {'type': 'loss', 'content': 0.18498621881008148, 'timestamp': '2025-09-10 02:27:21.812306', 'step': 1139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.843143', 'step': 1139, 'epoch': 1} {'type': 'loss', 'content': 0.09246736764907837, 'timestamp': '2025-09-10 02:27:21.866800', 'step': 1140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.896630', 'step': 1140, 'epoch': 1} {'type': 'loss', 'content': 0.1861693263053894, 'timestamp': '2025-09-10 02:27:21.899031', 'step': 1141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:21.928675', 'step': 1141, 'epoch': 1} {'type': 'loss', 'content': 0.2257867455482483, 'timestamp': '2025-09-10 02:27:21.930921', 'step': 1142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:21.960330', 'step': 1142, 'epoch': 1} {'type': 'loss', 'content': 0.1283630132675171, 'timestamp': '2025-09-10 02:27:21.962350', 'step': 1143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:21.991489', 'step': 1143, 'epoch': 1} {'type': 'loss', 'content': 0.21647438406944275, 'timestamp': '2025-09-10 02:27:22.014900', 'step': 1144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:22.046033', 'step': 1144, 'epoch': 1} {'type': 'loss', 'content': 0.1828519105911255, 'timestamp': '2025-09-10 02:27:22.048238', 'step': 1145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.078195', 'step': 1145, 'epoch': 1} {'type': 'loss', 'content': 0.1279613971710205, 'timestamp': '2025-09-10 02:27:22.080191', 'step': 1146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.110034', 'step': 1146, 'epoch': 1} {'type': 'loss', 'content': 0.17025798559188843, 'timestamp': '2025-09-10 02:27:22.112487', 'step': 1147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.144676', 'step': 1147, 'epoch': 1} {'type': 'loss', 'content': 0.25578346848487854, 'timestamp': '2025-09-10 02:27:22.168180', 'step': 1148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.197662', 'step': 1148, 'epoch': 1} {'type': 'loss', 'content': 0.25346896052360535, 'timestamp': '2025-09-10 02:27:22.199676', 'step': 1149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.229477', 'step': 1149, 'epoch': 1} {'type': 'loss', 'content': 0.21284711360931396, 'timestamp': '2025-09-10 02:27:22.231516', 'step': 1150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.261482', 'step': 1150, 'epoch': 1} {'type': 'loss', 'content': 0.10725290328264236, 'timestamp': '2025-09-10 02:27:22.263647', 'step': 1151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.295165', 'step': 1151, 'epoch': 1} {'type': 'loss', 'content': 0.16615983843803406, 'timestamp': '2025-09-10 02:27:22.318455', 'step': 1152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.349633', 'step': 1152, 'epoch': 1} {'type': 'loss', 'content': 0.1582360714673996, 'timestamp': '2025-09-10 02:27:22.351667', 'step': 1153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.381330', 'step': 1153, 'epoch': 1} {'type': 'loss', 'content': 0.14120861887931824, 'timestamp': '2025-09-10 02:27:22.383479', 'step': 1154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.415715', 'step': 1154, 'epoch': 1} {'type': 'loss', 'content': 0.17175720632076263, 'timestamp': '2025-09-10 02:27:22.417905', 'step': 1155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:27:22.448771', 'step': 1155, 'epoch': 1} {'type': 'loss', 'content': 0.2217513918876648, 'timestamp': '2025-09-10 02:27:22.476788', 'step': 1156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:22.507294', 'step': 1156, 'epoch': 1} {'type': 'loss', 'content': 0.21228773891925812, 'timestamp': '2025-09-10 02:27:22.509406', 'step': 1157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.538568', 'step': 1157, 'epoch': 1} {'type': 'loss', 'content': 0.20630013942718506, 'timestamp': '2025-09-10 02:27:22.540620', 'step': 1158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.570644', 'step': 1158, 'epoch': 1} {'type': 'loss', 'content': 0.19022291898727417, 'timestamp': '2025-09-10 02:27:22.572611', 'step': 1159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.602026', 'step': 1159, 'epoch': 1} {'type': 'loss', 'content': 0.17103895545005798, 'timestamp': '2025-09-10 02:27:22.625601', 'step': 1160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.655991', 'step': 1160, 'epoch': 1} {'type': 'loss', 'content': 0.12673848867416382, 'timestamp': '2025-09-10 02:27:22.659521', 'step': 1161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:22.693034', 'step': 1161, 'epoch': 1} {'type': 'loss', 'content': 0.2520807981491089, 'timestamp': '2025-09-10 02:27:22.695566', 'step': 1162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.725671', 'step': 1162, 'epoch': 1} {'type': 'loss', 'content': 0.16468684375286102, 'timestamp': '2025-09-10 02:27:22.728803', 'step': 1163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:22.758947', 'step': 1163, 'epoch': 1} {'type': 'loss', 'content': 0.12468663603067398, 'timestamp': '2025-09-10 02:27:22.782533', 'step': 1164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.812134', 'step': 1164, 'epoch': 1} {'type': 'loss', 'content': 0.14272025227546692, 'timestamp': '2025-09-10 02:27:22.814295', 'step': 1165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.844346', 'step': 1165, 'epoch': 1} {'type': 'loss', 'content': 0.1424809992313385, 'timestamp': '2025-09-10 02:27:22.846838', 'step': 1166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.877241', 'step': 1166, 'epoch': 1} {'type': 'loss', 'content': 0.17278794944286346, 'timestamp': '2025-09-10 02:27:22.879686', 'step': 1167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.909645', 'step': 1167, 'epoch': 1} {'type': 'loss', 'content': 0.22457732260227203, 'timestamp': '2025-09-10 02:27:22.933099', 'step': 1168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.962996', 'step': 1168, 'epoch': 1} {'type': 'loss', 'content': 0.177296981215477, 'timestamp': '2025-09-10 02:27:22.965298', 'step': 1169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.995299', 'step': 1169, 'epoch': 1} {'type': 'loss', 'content': 0.1954987794160843, 'timestamp': '2025-09-10 02:27:22.997508', 'step': 1170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.027389', 'step': 1170, 'epoch': 1} {'type': 'loss', 'content': 0.1606765240430832, 'timestamp': '2025-09-10 02:27:23.029587', 'step': 1171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.067418', 'step': 1171, 'epoch': 1} {'type': 'loss', 'content': 0.19029930233955383, 'timestamp': '2025-09-10 02:27:23.092436', 'step': 1172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.122709', 'step': 1172, 'epoch': 1} {'type': 'loss', 'content': 0.16553905606269836, 'timestamp': '2025-09-10 02:27:23.125006', 'step': 1173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:23.158036', 'step': 1173, 'epoch': 1} {'type': 'loss', 'content': 0.16301876306533813, 'timestamp': '2025-09-10 02:27:23.160394', 'step': 1174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.190020', 'step': 1174, 'epoch': 1} {'type': 'loss', 'content': 0.15115313231945038, 'timestamp': '2025-09-10 02:27:23.192461', 'step': 1175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:23.222967', 'step': 1175, 'epoch': 1} {'type': 'loss', 'content': 0.21349337697029114, 'timestamp': '2025-09-10 02:27:23.250387', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:23.280839', 'step': 1176, 'epoch': 1} {'type': 'loss', 'content': 0.172472283244133, 'timestamp': '2025-09-10 02:27:23.282975', 'step': 1177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:23.313760', 'step': 1177, 'epoch': 1} {'type': 'loss', 'content': 0.2586784064769745, 'timestamp': '2025-09-10 02:27:23.315965', 'step': 1178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.348338', 'step': 1178, 'epoch': 1} {'type': 'loss', 'content': 0.13602590560913086, 'timestamp': '2025-09-10 02:27:23.350160', 'step': 1179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.380199', 'step': 1179, 'epoch': 1} {'type': 'loss', 'content': 0.20548343658447266, 'timestamp': '2025-09-10 02:27:23.403560', 'step': 1180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.439553', 'step': 1180, 'epoch': 1} {'type': 'loss', 'content': 0.1948164999485016, 'timestamp': '2025-09-10 02:27:23.441773', 'step': 1181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.471308', 'step': 1181, 'epoch': 1} {'type': 'loss', 'content': 0.19398358464241028, 'timestamp': '2025-09-10 02:27:23.473413', 'step': 1182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.503663', 'step': 1182, 'epoch': 1} {'type': 'loss', 'content': 0.17782314121723175, 'timestamp': '2025-09-10 02:27:23.506266', 'step': 1183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:23.548790', 'step': 1183, 'epoch': 1} {'type': 'loss', 'content': 0.12569144368171692, 'timestamp': '2025-09-10 02:27:23.572436', 'step': 1184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:23.602947', 'step': 1184, 'epoch': 1} {'type': 'loss', 'content': 0.11558061838150024, 'timestamp': '2025-09-10 02:27:23.606655', 'step': 1185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.637120', 'step': 1185, 'epoch': 1} {'type': 'loss', 'content': 0.15625880658626556, 'timestamp': '2025-09-10 02:27:23.640391', 'step': 1186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:23.669607', 'step': 1186, 'epoch': 1} {'type': 'loss', 'content': 0.25044122338294983, 'timestamp': '2025-09-10 02:27:23.671805', 'step': 1187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.701571', 'step': 1187, 'epoch': 1} {'type': 'loss', 'content': 0.19989173114299774, 'timestamp': '2025-09-10 02:27:23.725122', 'step': 1188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.754869', 'step': 1188, 'epoch': 1} {'type': 'loss', 'content': 0.12416711449623108, 'timestamp': '2025-09-10 02:27:23.756975', 'step': 1189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:23.786622', 'step': 1189, 'epoch': 1} {'type': 'loss', 'content': 0.15946365892887115, 'timestamp': '2025-09-10 02:27:23.788838', 'step': 1190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:23.819991', 'step': 1190, 'epoch': 1} {'type': 'loss', 'content': 0.1964629590511322, 'timestamp': '2025-09-10 02:27:23.821843', 'step': 1191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.851129', 'step': 1191, 'epoch': 1} {'type': 'loss', 'content': 0.09807317703962326, 'timestamp': '2025-09-10 02:27:23.874646', 'step': 1192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.905121', 'step': 1192, 'epoch': 1} {'type': 'loss', 'content': 0.15583959221839905, 'timestamp': '2025-09-10 02:27:23.907340', 'step': 1193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.936757', 'step': 1193, 'epoch': 1} {'type': 'loss', 'content': 0.23979470133781433, 'timestamp': '2025-09-10 02:27:23.938930', 'step': 1194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.970097', 'step': 1194, 'epoch': 1} {'type': 'loss', 'content': 0.19081945717334747, 'timestamp': '2025-09-10 02:27:23.971904', 'step': 1195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.001132', 'step': 1195, 'epoch': 1} {'type': 'loss', 'content': 0.2088068723678589, 'timestamp': '2025-09-10 02:27:24.024295', 'step': 1196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.053643', 'step': 1196, 'epoch': 1} {'type': 'loss', 'content': 0.2488379180431366, 'timestamp': '2025-09-10 02:27:24.055978', 'step': 1197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.085615', 'step': 1197, 'epoch': 1} {'type': 'loss', 'content': 0.14837945997714996, 'timestamp': '2025-09-10 02:27:24.087877', 'step': 1198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.118054', 'step': 1198, 'epoch': 1} {'type': 'loss', 'content': 0.1462887078523636, 'timestamp': '2025-09-10 02:27:24.120440', 'step': 1199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.149711', 'step': 1199, 'epoch': 1} {'type': 'loss', 'content': 0.1352570652961731, 'timestamp': '2025-09-10 02:27:24.173214', 'step': 1200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.203119', 'step': 1200, 'epoch': 1} {'type': 'loss', 'content': 0.14517918229103088, 'timestamp': '2025-09-10 02:27:24.205792', 'step': 1201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:24.236143', 'step': 1201, 'epoch': 1} {'type': 'loss', 'content': 0.1800251305103302, 'timestamp': '2025-09-10 02:27:24.243234', 'step': 1202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:24.275386', 'step': 1202, 'epoch': 1} {'type': 'loss', 'content': 0.19834424555301666, 'timestamp': '2025-09-10 02:27:24.277447', 'step': 1203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.307581', 'step': 1203, 'epoch': 1} {'type': 'loss', 'content': 0.19712011516094208, 'timestamp': '2025-09-10 02:27:24.330998', 'step': 1204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.362062', 'step': 1204, 'epoch': 1} {'type': 'loss', 'content': 0.15347176790237427, 'timestamp': '2025-09-10 02:27:24.364028', 'step': 1205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:24.393025', 'step': 1205, 'epoch': 1} {'type': 'loss', 'content': 0.24168799817562103, 'timestamp': '2025-09-10 02:27:24.395882', 'step': 1206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.427031', 'step': 1206, 'epoch': 1} {'type': 'loss', 'content': 0.1777494102716446, 'timestamp': '2025-09-10 02:27:24.429435', 'step': 1207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.460172', 'step': 1207, 'epoch': 1} {'type': 'loss', 'content': 0.1463669091463089, 'timestamp': '2025-09-10 02:27:24.483964', 'step': 1208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:24.517515', 'step': 1208, 'epoch': 1} {'type': 'loss', 'content': 0.11112049221992493, 'timestamp': '2025-09-10 02:27:24.519953', 'step': 1209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:24.551539', 'step': 1209, 'epoch': 1} {'type': 'loss', 'content': 0.22922036051750183, 'timestamp': '2025-09-10 02:27:24.554263', 'step': 1210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.589498', 'step': 1210, 'epoch': 1} {'type': 'loss', 'content': 0.16069519519805908, 'timestamp': '2025-09-10 02:27:24.591728', 'step': 1211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:24.621553', 'step': 1211, 'epoch': 1} {'type': 'loss', 'content': 0.1923283040523529, 'timestamp': '2025-09-10 02:27:24.649192', 'step': 1212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:24.681257', 'step': 1212, 'epoch': 1} {'type': 'loss', 'content': 0.14011415839195251, 'timestamp': '2025-09-10 02:27:24.683605', 'step': 1213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.714217', 'step': 1213, 'epoch': 1} {'type': 'loss', 'content': 0.14336515963077545, 'timestamp': '2025-09-10 02:27:24.716662', 'step': 1214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.747563', 'step': 1214, 'epoch': 1} {'type': 'loss', 'content': 0.14526641368865967, 'timestamp': '2025-09-10 02:27:24.749574', 'step': 1215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.781020', 'step': 1215, 'epoch': 1} {'type': 'loss', 'content': 0.14472565054893494, 'timestamp': '2025-09-10 02:27:24.804537', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.834524', 'step': 1216, 'epoch': 1} {'type': 'loss', 'content': 0.17413456737995148, 'timestamp': '2025-09-10 02:27:24.836707', 'step': 1217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.866138', 'step': 1217, 'epoch': 1} {'type': 'loss', 'content': 0.15910303592681885, 'timestamp': '2025-09-10 02:27:24.868399', 'step': 1218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:24.900363', 'step': 1218, 'epoch': 1} {'type': 'loss', 'content': 0.19721052050590515, 'timestamp': '2025-09-10 02:27:24.902398', 'step': 1219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:24.932043', 'step': 1219, 'epoch': 1} {'type': 'loss', 'content': 0.12901341915130615, 'timestamp': '2025-09-10 02:27:24.955423', 'step': 1220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.988505', 'step': 1220, 'epoch': 1} {'type': 'loss', 'content': 0.18016375601291656, 'timestamp': '2025-09-10 02:27:24.990938', 'step': 1221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.020641', 'step': 1221, 'epoch': 1} {'type': 'loss', 'content': 0.12177105993032455, 'timestamp': '2025-09-10 02:27:25.022803', 'step': 1222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.052281', 'step': 1222, 'epoch': 1} {'type': 'loss', 'content': 0.1952940821647644, 'timestamp': '2025-09-10 02:27:25.054461', 'step': 1223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:25.085281', 'step': 1223, 'epoch': 1} {'type': 'loss', 'content': 0.12831783294677734, 'timestamp': '2025-09-10 02:27:25.108769', 'step': 1224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.139614', 'step': 1224, 'epoch': 1} {'type': 'loss', 'content': 0.2552708387374878, 'timestamp': '2025-09-10 02:27:25.142459', 'step': 1225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.172204', 'step': 1225, 'epoch': 1} {'type': 'loss', 'content': 0.2811063230037689, 'timestamp': '2025-09-10 02:27:25.174251', 'step': 1226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.203511', 'step': 1226, 'epoch': 1} {'type': 'loss', 'content': 0.23076534271240234, 'timestamp': '2025-09-10 02:27:25.205726', 'step': 1227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:25.236423', 'step': 1227, 'epoch': 1} {'type': 'loss', 'content': 0.13344578444957733, 'timestamp': '2025-09-10 02:27:25.259860', 'step': 1228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.290100', 'step': 1228, 'epoch': 1} {'type': 'loss', 'content': 0.21364633738994598, 'timestamp': '2025-09-10 02:27:25.292432', 'step': 1229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.321927', 'step': 1229, 'epoch': 1} {'type': 'loss', 'content': 0.28841546177864075, 'timestamp': '2025-09-10 02:27:25.323970', 'step': 1230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:25.353581', 'step': 1230, 'epoch': 1} {'type': 'loss', 'content': 0.21010546386241913, 'timestamp': '2025-09-10 02:27:25.355754', 'step': 1231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:25.385723', 'step': 1231, 'epoch': 1} {'type': 'loss', 'content': 0.19847190380096436, 'timestamp': '2025-09-10 02:27:25.409189', 'step': 1232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.439126', 'step': 1232, 'epoch': 1} {'type': 'loss', 'content': 0.1871524304151535, 'timestamp': '2025-09-10 02:27:25.441326', 'step': 1233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:25.470703', 'step': 1233, 'epoch': 1} {'type': 'loss', 'content': 0.15353721380233765, 'timestamp': '2025-09-10 02:27:25.472667', 'step': 1234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.502648', 'step': 1234, 'epoch': 1} {'type': 'loss', 'content': 0.13448329269886017, 'timestamp': '2025-09-10 02:27:25.505099', 'step': 1235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.534492', 'step': 1235, 'epoch': 1} {'type': 'loss', 'content': 0.1332300454378128, 'timestamp': '2025-09-10 02:27:25.557954', 'step': 1236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:25.589264', 'step': 1236, 'epoch': 1} {'type': 'loss', 'content': 0.211435467004776, 'timestamp': '2025-09-10 02:27:25.591515', 'step': 1237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:27:25.642766', 'step': 1237, 'epoch': 1} {'type': 'loss', 'content': 0.43538278341293335, 'timestamp': '2025-09-10 02:27:25.648828', 'step': 1238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.679449', 'step': 1238, 'epoch': 1} {'type': 'loss', 'content': 0.14927665889263153, 'timestamp': '2025-09-10 02:27:25.681436', 'step': 1239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.710802', 'step': 1239, 'epoch': 1} {'type': 'loss', 'content': 0.24607634544372559, 'timestamp': '2025-09-10 02:27:25.734180', 'step': 1240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.764323', 'step': 1240, 'epoch': 1} {'type': 'loss', 'content': 0.29963067173957825, 'timestamp': '2025-09-10 02:27:25.767494', 'step': 1241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.797924', 'step': 1241, 'epoch': 1} {'type': 'loss', 'content': 0.16600024700164795, 'timestamp': '2025-09-10 02:27:25.800269', 'step': 1242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.831027', 'step': 1242, 'epoch': 1} {'type': 'loss', 'content': 0.16750860214233398, 'timestamp': '2025-09-10 02:27:25.833486', 'step': 1243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.863293', 'step': 1243, 'epoch': 1} {'type': 'loss', 'content': 0.11095331609249115, 'timestamp': '2025-09-10 02:27:25.886611', 'step': 1244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.916974', 'step': 1244, 'epoch': 1} {'type': 'loss', 'content': 0.2249501347541809, 'timestamp': '2025-09-10 02:27:25.919466', 'step': 1245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:25.949342', 'step': 1245, 'epoch': 1} {'type': 'loss', 'content': 0.1578671634197235, 'timestamp': '2025-09-10 02:27:25.951789', 'step': 1246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.981123', 'step': 1246, 'epoch': 1} {'type': 'loss', 'content': 0.2526402771472931, 'timestamp': '2025-09-10 02:27:25.983687', 'step': 1247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.014945', 'step': 1247, 'epoch': 1} {'type': 'loss', 'content': 0.09727950394153595, 'timestamp': '2025-09-10 02:27:26.038604', 'step': 1248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.068638', 'step': 1248, 'epoch': 1} {'type': 'loss', 'content': 0.175349622964859, 'timestamp': '2025-09-10 02:27:26.071827', 'step': 1249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.101655', 'step': 1249, 'epoch': 1} {'type': 'loss', 'content': 0.1412641704082489, 'timestamp': '2025-09-10 02:27:26.104492', 'step': 1250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:26.135462', 'step': 1250, 'epoch': 1} {'type': 'loss', 'content': 0.18046776950359344, 'timestamp': '2025-09-10 02:27:26.137460', 'step': 1251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:26.166405', 'step': 1251, 'epoch': 1} {'type': 'loss', 'content': 0.14762336015701294, 'timestamp': '2025-09-10 02:27:26.189877', 'step': 1252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.220785', 'step': 1252, 'epoch': 1} {'type': 'loss', 'content': 0.1446884125471115, 'timestamp': '2025-09-10 02:27:26.222889', 'step': 1253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.252300', 'step': 1253, 'epoch': 1} {'type': 'loss', 'content': 0.1438644677400589, 'timestamp': '2025-09-10 02:27:26.254388', 'step': 1254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.284359', 'step': 1254, 'epoch': 1} {'type': 'loss', 'content': 0.1572432667016983, 'timestamp': '2025-09-10 02:27:26.286430', 'step': 1255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:26.316367', 'step': 1255, 'epoch': 1} {'type': 'loss', 'content': 0.18543653190135956, 'timestamp': '2025-09-10 02:27:26.339981', 'step': 1256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.371302', 'step': 1256, 'epoch': 1} {'type': 'loss', 'content': 0.1722637414932251, 'timestamp': '2025-09-10 02:27:26.373658', 'step': 1257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.403828', 'step': 1257, 'epoch': 1} {'type': 'loss', 'content': 0.22217746078968048, 'timestamp': '2025-09-10 02:27:26.406198', 'step': 1258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.435540', 'step': 1258, 'epoch': 1} {'type': 'loss', 'content': 0.1888876110315323, 'timestamp': '2025-09-10 02:27:26.437704', 'step': 1259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.467620', 'step': 1259, 'epoch': 1} {'type': 'loss', 'content': 0.25756675004959106, 'timestamp': '2025-09-10 02:27:26.491101', 'step': 1260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:26.521518', 'step': 1260, 'epoch': 1} {'type': 'loss', 'content': 0.217755988240242, 'timestamp': '2025-09-10 02:27:26.523655', 'step': 1261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:26.553814', 'step': 1261, 'epoch': 1} {'type': 'loss', 'content': 0.24559324979782104, 'timestamp': '2025-09-10 02:27:26.555843', 'step': 1262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.585131', 'step': 1262, 'epoch': 1} {'type': 'loss', 'content': 0.19934843480587006, 'timestamp': '2025-09-10 02:27:26.587399', 'step': 1263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.616442', 'step': 1263, 'epoch': 1} {'type': 'loss', 'content': 0.14346976578235626, 'timestamp': '2025-09-10 02:27:26.640199', 'step': 1264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.670479', 'step': 1264, 'epoch': 1} {'type': 'loss', 'content': 0.2759956419467926, 'timestamp': '2025-09-10 02:27:26.672535', 'step': 1265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.703047', 'step': 1265, 'epoch': 1} {'type': 'loss', 'content': 0.17826345562934875, 'timestamp': '2025-09-10 02:27:26.704968', 'step': 1266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.734161', 'step': 1266, 'epoch': 1} {'type': 'loss', 'content': 0.2614977955818176, 'timestamp': '2025-09-10 02:27:26.736327', 'step': 1267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.765968', 'step': 1267, 'epoch': 1} {'type': 'loss', 'content': 0.05883055552840233, 'timestamp': '2025-09-10 02:27:26.789272', 'step': 1268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.819243', 'step': 1268, 'epoch': 1} {'type': 'loss', 'content': 0.14606055617332458, 'timestamp': '2025-09-10 02:27:26.821457', 'step': 1269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:26.851043', 'step': 1269, 'epoch': 1} {'type': 'loss', 'content': 0.34187233448028564, 'timestamp': '2025-09-10 02:27:26.853432', 'step': 1270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.884168', 'step': 1270, 'epoch': 1} {'type': 'loss', 'content': 0.25180208683013916, 'timestamp': '2025-09-10 02:27:26.886391', 'step': 1271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:26.916464', 'step': 1271, 'epoch': 1} {'type': 'loss', 'content': 0.18178686499595642, 'timestamp': '2025-09-10 02:27:26.939935', 'step': 1272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.969711', 'step': 1272, 'epoch': 1} {'type': 'loss', 'content': 0.19301150739192963, 'timestamp': '2025-09-10 02:27:26.971904', 'step': 1273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.003525', 'step': 1273, 'epoch': 1} {'type': 'loss', 'content': 0.1253863424062729, 'timestamp': '2025-09-10 02:27:27.005818', 'step': 1274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.036380', 'step': 1274, 'epoch': 1} {'type': 'loss', 'content': 0.09767373651266098, 'timestamp': '2025-09-10 02:27:27.038363', 'step': 1275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:27.068205', 'step': 1275, 'epoch': 1} {'type': 'loss', 'content': 0.21294590830802917, 'timestamp': '2025-09-10 02:27:27.091678', 'step': 1276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.122603', 'step': 1276, 'epoch': 1} {'type': 'loss', 'content': 0.27123910188674927, 'timestamp': '2025-09-10 02:27:27.125291', 'step': 1277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:27.156342', 'step': 1277, 'epoch': 1} {'type': 'loss', 'content': 0.16980178654193878, 'timestamp': '2025-09-10 02:27:27.158489', 'step': 1278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:27.188268', 'step': 1278, 'epoch': 1} {'type': 'loss', 'content': 0.15758730471134186, 'timestamp': '2025-09-10 02:27:27.190268', 'step': 1279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:27.219703', 'step': 1279, 'epoch': 1} {'type': 'loss', 'content': 0.20183837413787842, 'timestamp': '2025-09-10 02:27:27.243487', 'step': 1280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:27.274597', 'step': 1280, 'epoch': 1} {'type': 'loss', 'content': 0.16190017759799957, 'timestamp': '2025-09-10 02:27:27.276974', 'step': 1281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.306706', 'step': 1281, 'epoch': 1} {'type': 'loss', 'content': 0.13619810342788696, 'timestamp': '2025-09-10 02:27:27.308736', 'step': 1282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:27.338370', 'step': 1282, 'epoch': 1} {'type': 'loss', 'content': 0.19330936670303345, 'timestamp': '2025-09-10 02:27:27.340649', 'step': 1283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.371704', 'step': 1283, 'epoch': 1} {'type': 'loss', 'content': 0.15648627281188965, 'timestamp': '2025-09-10 02:27:27.395398', 'step': 1284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:27.426062', 'step': 1284, 'epoch': 1} {'type': 'loss', 'content': 0.17981819808483124, 'timestamp': '2025-09-10 02:27:27.428329', 'step': 1285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.457711', 'step': 1285, 'epoch': 1} {'type': 'loss', 'content': 0.22550630569458008, 'timestamp': '2025-09-10 02:27:27.459873', 'step': 1286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.489410', 'step': 1286, 'epoch': 1} {'type': 'loss', 'content': 0.26591604948043823, 'timestamp': '2025-09-10 02:27:27.491502', 'step': 1287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.521567', 'step': 1287, 'epoch': 1} {'type': 'loss', 'content': 0.12882870435714722, 'timestamp': '2025-09-10 02:27:27.545544', 'step': 1288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.575574', 'step': 1288, 'epoch': 1} {'type': 'loss', 'content': 0.14318561553955078, 'timestamp': '2025-09-10 02:27:27.577465', 'step': 1289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:27.607405', 'step': 1289, 'epoch': 1} {'type': 'loss', 'content': 0.18097074329853058, 'timestamp': '2025-09-10 02:27:27.609471', 'step': 1290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.639026', 'step': 1290, 'epoch': 1} {'type': 'loss', 'content': 0.20488789677619934, 'timestamp': '2025-09-10 02:27:27.641292', 'step': 1291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.671488', 'step': 1291, 'epoch': 1} {'type': 'loss', 'content': 0.17203259468078613, 'timestamp': '2025-09-10 02:27:27.694658', 'step': 1292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:27.727207', 'step': 1292, 'epoch': 1} {'type': 'loss', 'content': 0.14986389875411987, 'timestamp': '2025-09-10 02:27:27.729500', 'step': 1293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:27.759461', 'step': 1293, 'epoch': 1} {'type': 'loss', 'content': 0.16176772117614746, 'timestamp': '2025-09-10 02:27:27.761703', 'step': 1294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:27.791663', 'step': 1294, 'epoch': 1} {'type': 'loss', 'content': 0.21509921550750732, 'timestamp': '2025-09-10 02:27:27.794622', 'step': 1295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:27.824211', 'step': 1295, 'epoch': 1} {'type': 'loss', 'content': 0.23076945543289185, 'timestamp': '2025-09-10 02:27:27.847625', 'step': 1296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:27.878288', 'step': 1296, 'epoch': 1} {'type': 'loss', 'content': 0.12912121415138245, 'timestamp': '2025-09-10 02:27:27.880415', 'step': 1297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:27.911602', 'step': 1297, 'epoch': 1} {'type': 'loss', 'content': 0.1855529099702835, 'timestamp': '2025-09-10 02:27:27.913759', 'step': 1298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:27.943762', 'step': 1298, 'epoch': 1} {'type': 'loss', 'content': 0.17447572946548462, 'timestamp': '2025-09-10 02:27:27.946873', 'step': 1299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:27.978661', 'step': 1299, 'epoch': 1} {'type': 'loss', 'content': 0.16367946565151215, 'timestamp': '2025-09-10 02:27:28.002448', 'step': 1300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.033560', 'step': 1300, 'epoch': 1} {'type': 'loss', 'content': 0.24138733744621277, 'timestamp': '2025-09-10 02:27:28.036370', 'step': 1301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.072512', 'step': 1301, 'epoch': 1} {'type': 'loss', 'content': 0.19814011454582214, 'timestamp': '2025-09-10 02:27:28.074686', 'step': 1302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.105131', 'step': 1302, 'epoch': 1} {'type': 'loss', 'content': 0.26490679383277893, 'timestamp': '2025-09-10 02:27:28.107368', 'step': 1303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.137487', 'step': 1303, 'epoch': 1} {'type': 'loss', 'content': 0.1887141466140747, 'timestamp': '2025-09-10 02:27:28.161112', 'step': 1304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:28.192643', 'step': 1304, 'epoch': 1} {'type': 'loss', 'content': 0.16513670980930328, 'timestamp': '2025-09-10 02:27:28.197750', 'step': 1305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:28.231741', 'step': 1305, 'epoch': 1} {'type': 'loss', 'content': 0.3324218690395355, 'timestamp': '2025-09-10 02:27:28.234345', 'step': 1306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.272795', 'step': 1306, 'epoch': 1} {'type': 'loss', 'content': 0.1861693263053894, 'timestamp': '2025-09-10 02:27:28.274973', 'step': 1307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:28.304943', 'step': 1307, 'epoch': 1} {'type': 'loss', 'content': 0.24053621292114258, 'timestamp': '2025-09-10 02:27:28.328444', 'step': 1308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.359317', 'step': 1308, 'epoch': 1} {'type': 'loss', 'content': 0.14212560653686523, 'timestamp': '2025-09-10 02:27:28.361884', 'step': 1309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:28.391477', 'step': 1309, 'epoch': 1} {'type': 'loss', 'content': 0.10069824755191803, 'timestamp': '2025-09-10 02:27:28.393592', 'step': 1310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.432856', 'step': 1310, 'epoch': 1} {'type': 'loss', 'content': 0.147059828042984, 'timestamp': '2025-09-10 02:27:28.434996', 'step': 1311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:28.465118', 'step': 1311, 'epoch': 1} {'type': 'loss', 'content': 0.1469826102256775, 'timestamp': '2025-09-10 02:27:28.488697', 'step': 1312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.519048', 'step': 1312, 'epoch': 1} {'type': 'loss', 'content': 0.19957025349140167, 'timestamp': '2025-09-10 02:27:28.521387', 'step': 1313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:28.551394', 'step': 1313, 'epoch': 1} {'type': 'loss', 'content': 0.1618897169828415, 'timestamp': '2025-09-10 02:27:28.553917', 'step': 1314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:28.584687', 'step': 1314, 'epoch': 1} {'type': 'loss', 'content': 0.19369731843471527, 'timestamp': '2025-09-10 02:27:28.587754', 'step': 1315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.627214', 'step': 1315, 'epoch': 1} {'type': 'loss', 'content': 0.22400572896003723, 'timestamp': '2025-09-10 02:27:28.651619', 'step': 1316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.681749', 'step': 1316, 'epoch': 1} {'type': 'loss', 'content': 0.15650027990341187, 'timestamp': '2025-09-10 02:27:28.684118', 'step': 1317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.713561', 'step': 1317, 'epoch': 1} {'type': 'loss', 'content': 0.13408711552619934, 'timestamp': '2025-09-10 02:27:28.716171', 'step': 1318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:28.750564', 'step': 1318, 'epoch': 1} {'type': 'loss', 'content': 0.16770993173122406, 'timestamp': '2025-09-10 02:27:28.753143', 'step': 1319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.787192', 'step': 1319, 'epoch': 1} {'type': 'loss', 'content': 0.07450095564126968, 'timestamp': '2025-09-10 02:27:28.810697', 'step': 1320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:28.846144', 'step': 1320, 'epoch': 1} {'type': 'loss', 'content': 0.19350554049015045, 'timestamp': '2025-09-10 02:27:28.848298', 'step': 1321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:28.878504', 'step': 1321, 'epoch': 1} {'type': 'loss', 'content': 0.11865957826375961, 'timestamp': '2025-09-10 02:27:28.882768', 'step': 1322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.914505', 'step': 1322, 'epoch': 1} {'type': 'loss', 'content': 0.12210803478956223, 'timestamp': '2025-09-10 02:27:28.916777', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:28.947322', 'step': 1323, 'epoch': 1} {'type': 'loss', 'content': 0.14235977828502655, 'timestamp': '2025-09-10 02:27:28.977048', 'step': 1324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:29.006922', 'step': 1324, 'epoch': 1} {'type': 'loss', 'content': 0.28138163685798645, 'timestamp': '2025-09-10 02:27:29.009687', 'step': 1325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.039247', 'step': 1325, 'epoch': 1} {'type': 'loss', 'content': 0.10644259303808212, 'timestamp': '2025-09-10 02:27:29.041272', 'step': 1326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.071284', 'step': 1326, 'epoch': 1} {'type': 'loss', 'content': 0.16340643167495728, 'timestamp': '2025-09-10 02:27:29.073251', 'step': 1327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.102347', 'step': 1327, 'epoch': 1} {'type': 'loss', 'content': 0.1404794603586197, 'timestamp': '2025-09-10 02:27:29.125839', 'step': 1328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.156005', 'step': 1328, 'epoch': 1} {'type': 'loss', 'content': 0.13673104345798492, 'timestamp': '2025-09-10 02:27:29.158425', 'step': 1329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:29.189184', 'step': 1329, 'epoch': 1} {'type': 'loss', 'content': 0.11551151424646378, 'timestamp': '2025-09-10 02:27:29.191595', 'step': 1330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.221474', 'step': 1330, 'epoch': 1} {'type': 'loss', 'content': 0.14338473975658417, 'timestamp': '2025-09-10 02:27:29.223644', 'step': 1331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:29.253307', 'step': 1331, 'epoch': 1} {'type': 'loss', 'content': 0.25673991441726685, 'timestamp': '2025-09-10 02:27:29.280844', 'step': 1332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.311775', 'step': 1332, 'epoch': 1} {'type': 'loss', 'content': 0.1944417953491211, 'timestamp': '2025-09-10 02:27:29.313957', 'step': 1333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:29.342876', 'step': 1333, 'epoch': 1} {'type': 'loss', 'content': 0.2606304883956909, 'timestamp': '2025-09-10 02:27:29.345191', 'step': 1334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.374733', 'step': 1334, 'epoch': 1} {'type': 'loss', 'content': 0.1823713779449463, 'timestamp': '2025-09-10 02:27:29.376909', 'step': 1335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.406680', 'step': 1335, 'epoch': 1} {'type': 'loss', 'content': 0.17182791233062744, 'timestamp': '2025-09-10 02:27:29.430417', 'step': 1336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:29.461413', 'step': 1336, 'epoch': 1} {'type': 'loss', 'content': 0.10679792612791061, 'timestamp': '2025-09-10 02:27:29.463680', 'step': 1337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.493638', 'step': 1337, 'epoch': 1} {'type': 'loss', 'content': 0.10664232075214386, 'timestamp': '2025-09-10 02:27:29.495820', 'step': 1338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:29.526699', 'step': 1338, 'epoch': 1} {'type': 'loss', 'content': 0.14816196262836456, 'timestamp': '2025-09-10 02:27:29.528708', 'step': 1339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.558593', 'step': 1339, 'epoch': 1} {'type': 'loss', 'content': 0.25352391600608826, 'timestamp': '2025-09-10 02:27:29.582275', 'step': 1340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.611927', 'step': 1340, 'epoch': 1} {'type': 'loss', 'content': 0.19970066845417023, 'timestamp': '2025-09-10 02:27:29.615785', 'step': 1341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.645656', 'step': 1341, 'epoch': 1} {'type': 'loss', 'content': 0.21126793324947357, 'timestamp': '2025-09-10 02:27:29.647811', 'step': 1342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:29.678208', 'step': 1342, 'epoch': 1} {'type': 'loss', 'content': 0.1301194429397583, 'timestamp': '2025-09-10 02:27:29.680486', 'step': 1343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.710639', 'step': 1343, 'epoch': 1} {'type': 'loss', 'content': 0.12485362589359283, 'timestamp': '2025-09-10 02:27:29.734497', 'step': 1344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.764762', 'step': 1344, 'epoch': 1} {'type': 'loss', 'content': 0.322079062461853, 'timestamp': '2025-09-10 02:27:29.767058', 'step': 1345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.797448', 'step': 1345, 'epoch': 1} {'type': 'loss', 'content': 0.1959877610206604, 'timestamp': '2025-09-10 02:27:29.799494', 'step': 1346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:29.830651', 'step': 1346, 'epoch': 1} {'type': 'loss', 'content': 0.11408790946006775, 'timestamp': '2025-09-10 02:27:29.833213', 'step': 1347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.864358', 'step': 1347, 'epoch': 1} {'type': 'loss', 'content': 0.23323889076709747, 'timestamp': '2025-09-10 02:27:29.887896', 'step': 1348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.919326', 'step': 1348, 'epoch': 1} {'type': 'loss', 'content': 0.21372084319591522, 'timestamp': '2025-09-10 02:27:29.921624', 'step': 1349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:29.951444', 'step': 1349, 'epoch': 1} {'type': 'loss', 'content': 0.1908271163702011, 'timestamp': '2025-09-10 02:27:29.953747', 'step': 1350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.983740', 'step': 1350, 'epoch': 1} {'type': 'loss', 'content': 0.14099372923374176, 'timestamp': '2025-09-10 02:27:29.985716', 'step': 1351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:30.015848', 'step': 1351, 'epoch': 1} {'type': 'loss', 'content': 0.1570969969034195, 'timestamp': '2025-09-10 02:27:30.039354', 'step': 1352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.069370', 'step': 1352, 'epoch': 1} {'type': 'loss', 'content': 0.22041313350200653, 'timestamp': '2025-09-10 02:27:30.071383', 'step': 1353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.101026', 'step': 1353, 'epoch': 1} {'type': 'loss', 'content': 0.14919856190681458, 'timestamp': '2025-09-10 02:27:30.103167', 'step': 1354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.132715', 'step': 1354, 'epoch': 1} {'type': 'loss', 'content': 0.3338930904865265, 'timestamp': '2025-09-10 02:27:30.134694', 'step': 1355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.164652', 'step': 1355, 'epoch': 1} {'type': 'loss', 'content': 0.13588669896125793, 'timestamp': '2025-09-10 02:27:30.188200', 'step': 1356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:30.218673', 'step': 1356, 'epoch': 1} {'type': 'loss', 'content': 0.1960083246231079, 'timestamp': '2025-09-10 02:27:30.220733', 'step': 1357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.252310', 'step': 1357, 'epoch': 1} {'type': 'loss', 'content': 0.29045411944389343, 'timestamp': '2025-09-10 02:27:30.254913', 'step': 1358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:30.286795', 'step': 1358, 'epoch': 1} {'type': 'loss', 'content': 0.2397114485502243, 'timestamp': '2025-09-10 02:27:30.289125', 'step': 1359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.318484', 'step': 1359, 'epoch': 1} {'type': 'loss', 'content': 0.19530469179153442, 'timestamp': '2025-09-10 02:27:30.341801', 'step': 1360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.372517', 'step': 1360, 'epoch': 1} {'type': 'loss', 'content': 0.20148156583309174, 'timestamp': '2025-09-10 02:27:30.375027', 'step': 1361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.406398', 'step': 1361, 'epoch': 1} {'type': 'loss', 'content': 0.09225887805223465, 'timestamp': '2025-09-10 02:27:30.408685', 'step': 1362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:30.439353', 'step': 1362, 'epoch': 1} {'type': 'loss', 'content': 0.11829516291618347, 'timestamp': '2025-09-10 02:27:30.442062', 'step': 1363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.471845', 'step': 1363, 'epoch': 1} {'type': 'loss', 'content': 0.23407652974128723, 'timestamp': '2025-09-10 02:27:30.495618', 'step': 1364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.526440', 'step': 1364, 'epoch': 1} {'type': 'loss', 'content': 0.09790180623531342, 'timestamp': '2025-09-10 02:27:30.529348', 'step': 1365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:30.559359', 'step': 1365, 'epoch': 1} {'type': 'loss', 'content': 0.13113577663898468, 'timestamp': '2025-09-10 02:27:30.562117', 'step': 1366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.592634', 'step': 1366, 'epoch': 1} {'type': 'loss', 'content': 0.18697568774223328, 'timestamp': '2025-09-10 02:27:30.594776', 'step': 1367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.624708', 'step': 1367, 'epoch': 1} {'type': 'loss', 'content': 0.08803489059209824, 'timestamp': '2025-09-10 02:27:30.649845', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.679475', 'step': 1368, 'epoch': 1} {'type': 'loss', 'content': 0.1868707537651062, 'timestamp': '2025-09-10 02:27:30.682468', 'step': 1369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:30.712876', 'step': 1369, 'epoch': 1} {'type': 'loss', 'content': 0.17357444763183594, 'timestamp': '2025-09-10 02:27:30.714986', 'step': 1370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.745344', 'step': 1370, 'epoch': 1} {'type': 'loss', 'content': 0.19598248600959778, 'timestamp': '2025-09-10 02:27:30.747335', 'step': 1371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:30.784777', 'step': 1371, 'epoch': 1} {'type': 'loss', 'content': 0.13153858482837677, 'timestamp': '2025-09-10 02:27:30.808436', 'step': 1372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.839390', 'step': 1372, 'epoch': 1} {'type': 'loss', 'content': 0.08202915638685226, 'timestamp': '2025-09-10 02:27:30.841692', 'step': 1373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.882486', 'step': 1373, 'epoch': 1} {'type': 'loss', 'content': 0.21203741431236267, 'timestamp': '2025-09-10 02:27:30.884728', 'step': 1374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:30.914688', 'step': 1374, 'epoch': 1} {'type': 'loss', 'content': 0.15136292576789856, 'timestamp': '2025-09-10 02:27:30.917619', 'step': 1375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.948191', 'step': 1375, 'epoch': 1} {'type': 'loss', 'content': 0.23266860842704773, 'timestamp': '2025-09-10 02:27:30.971895', 'step': 1376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:31.002132', 'step': 1376, 'epoch': 1} {'type': 'loss', 'content': 0.16797734797000885, 'timestamp': '2025-09-10 02:27:31.006748', 'step': 1377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.037139', 'step': 1377, 'epoch': 1} {'type': 'loss', 'content': 0.21111780405044556, 'timestamp': '2025-09-10 02:27:31.039310', 'step': 1378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:31.069672', 'step': 1378, 'epoch': 1} {'type': 'loss', 'content': 0.14053891599178314, 'timestamp': '2025-09-10 02:27:31.072250', 'step': 1379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.102941', 'step': 1379, 'epoch': 1} {'type': 'loss', 'content': 0.15684516727924347, 'timestamp': '2025-09-10 02:27:31.126322', 'step': 1380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:31.156553', 'step': 1380, 'epoch': 1} {'type': 'loss', 'content': 0.16707229614257812, 'timestamp': '2025-09-10 02:27:31.158566', 'step': 1381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:31.188443', 'step': 1381, 'epoch': 1} {'type': 'loss', 'content': 0.13233551383018494, 'timestamp': '2025-09-10 02:27:31.191525', 'step': 1382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.222165', 'step': 1382, 'epoch': 1} {'type': 'loss', 'content': 0.23691847920417786, 'timestamp': '2025-09-10 02:27:31.224411', 'step': 1383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.254516', 'step': 1383, 'epoch': 1} {'type': 'loss', 'content': 0.14894317090511322, 'timestamp': '2025-09-10 02:27:31.285560', 'step': 1384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.319869', 'step': 1384, 'epoch': 1} {'type': 'loss', 'content': 0.2302226722240448, 'timestamp': '2025-09-10 02:27:31.322104', 'step': 1385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:31.354653', 'step': 1385, 'epoch': 1} {'type': 'loss', 'content': 0.1886342316865921, 'timestamp': '2025-09-10 02:27:31.357213', 'step': 1386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:31.389609', 'step': 1386, 'epoch': 1} {'type': 'loss', 'content': 0.15914183855056763, 'timestamp': '2025-09-10 02:27:31.391730', 'step': 1387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.421926', 'step': 1387, 'epoch': 1} {'type': 'loss', 'content': 0.19292514026165009, 'timestamp': '2025-09-10 02:27:31.445659', 'step': 1388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.475774', 'step': 1388, 'epoch': 1} {'type': 'loss', 'content': 0.20341342687606812, 'timestamp': '2025-09-10 02:27:31.478146', 'step': 1389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.508817', 'step': 1389, 'epoch': 1} {'type': 'loss', 'content': 0.16831813752651215, 'timestamp': '2025-09-10 02:27:31.511253', 'step': 1390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:31.541528', 'step': 1390, 'epoch': 1} {'type': 'loss', 'content': 0.19894906878471375, 'timestamp': '2025-09-10 02:27:31.543962', 'step': 1391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:31.573254', 'step': 1391, 'epoch': 1} {'type': 'loss', 'content': 0.07710407674312592, 'timestamp': '2025-09-10 02:27:31.597133', 'step': 1392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.627140', 'step': 1392, 'epoch': 1} {'type': 'loss', 'content': 0.2214154154062271, 'timestamp': '2025-09-10 02:27:31.629357', 'step': 1393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.659597', 'step': 1393, 'epoch': 1} {'type': 'loss', 'content': 0.16894033551216125, 'timestamp': '2025-09-10 02:27:31.661624', 'step': 1394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.691534', 'step': 1394, 'epoch': 1} {'type': 'loss', 'content': 0.12957531213760376, 'timestamp': '2025-09-10 02:27:31.693590', 'step': 1395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.724411', 'step': 1395, 'epoch': 1} {'type': 'loss', 'content': 0.2558973431587219, 'timestamp': '2025-09-10 02:27:31.747915', 'step': 1396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:31.777809', 'step': 1396, 'epoch': 1} {'type': 'loss', 'content': 0.18501520156860352, 'timestamp': '2025-09-10 02:27:31.779839', 'step': 1397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.810623', 'step': 1397, 'epoch': 1} {'type': 'loss', 'content': 0.1912100911140442, 'timestamp': '2025-09-10 02:27:31.812939', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:27:39.424164', 'step': 1398, 'epoch': 1} {'type': 'pplx', 'content': 8462.783070122881, 'timestamp': '2025-09-10 02:27:39.429041', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:39.458560', 'step': 1398, 'epoch': 1} {'type': 'loss', 'content': 0.24110820889472961, 'timestamp': '2025-09-10 02:27:39.460986', 'step': 1399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:39.492277', 'step': 1399, 'epoch': 1} {'type': 'loss', 'content': 0.14832869172096252, 'timestamp': '2025-09-10 02:27:39.515903', 'step': 1400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:39.546083', 'step': 1400, 'epoch': 1} {'type': 'loss', 'content': 0.2165868729352951, 'timestamp': '2025-09-10 02:27:39.548248', 'step': 1401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:39.578072', 'step': 1401, 'epoch': 1} {'type': 'loss', 'content': 0.20303641259670258, 'timestamp': '2025-09-10 02:27:39.580286', 'step': 1402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:39.609839', 'step': 1402, 'epoch': 1} {'type': 'loss', 'content': 0.1708463430404663, 'timestamp': '2025-09-10 02:27:39.612022', 'step': 1403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:39.642047', 'step': 1403, 'epoch': 1} {'type': 'loss', 'content': 0.16709274053573608, 'timestamp': '2025-09-10 02:27:39.666931', 'step': 1404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:39.697324', 'step': 1404, 'epoch': 1} {'type': 'loss', 'content': 0.17663481831550598, 'timestamp': '2025-09-10 02:27:39.699367', 'step': 1405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:39.729476', 'step': 1405, 'epoch': 1} {'type': 'loss', 'content': 0.14807169139385223, 'timestamp': '2025-09-10 02:27:39.732249', 'step': 1406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:39.762240', 'step': 1406, 'epoch': 1} {'type': 'loss', 'content': 0.13966389000415802, 'timestamp': '2025-09-10 02:27:39.764513', 'step': 1407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:39.794113', 'step': 1407, 'epoch': 1} {'type': 'loss', 'content': 0.2519131302833557, 'timestamp': '2025-09-10 02:27:39.817806', 'step': 1408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:39.848404', 'step': 1408, 'epoch': 1} {'type': 'loss', 'content': 0.1578877866268158, 'timestamp': '2025-09-10 02:27:39.850538', 'step': 1409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:39.880175', 'step': 1409, 'epoch': 1} {'type': 'loss', 'content': 0.16815891861915588, 'timestamp': '2025-09-10 02:27:39.882507', 'step': 1410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:39.912223', 'step': 1410, 'epoch': 1} {'type': 'loss', 'content': 0.17134374380111694, 'timestamp': '2025-09-10 02:27:39.914888', 'step': 1411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:39.944446', 'step': 1411, 'epoch': 1} {'type': 'loss', 'content': 0.1269114762544632, 'timestamp': '2025-09-10 02:27:39.968252', 'step': 1412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:39.998050', 'step': 1412, 'epoch': 1} {'type': 'loss', 'content': 0.22287848591804504, 'timestamp': '2025-09-10 02:27:39.999849', 'step': 1413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:40.029616', 'step': 1413, 'epoch': 1} {'type': 'loss', 'content': 0.19346722960472107, 'timestamp': '2025-09-10 02:27:40.031881', 'step': 1414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:40.061774', 'step': 1414, 'epoch': 1} {'type': 'loss', 'content': 0.15368320047855377, 'timestamp': '2025-09-10 02:27:40.064478', 'step': 1415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:40.095172', 'step': 1415, 'epoch': 1} {'type': 'loss', 'content': 0.10326653718948364, 'timestamp': '2025-09-10 02:27:40.119243', 'step': 1416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.153732', 'step': 1416, 'epoch': 1} {'type': 'loss', 'content': 0.3002311885356903, 'timestamp': '2025-09-10 02:27:40.156097', 'step': 1417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:40.187467', 'step': 1417, 'epoch': 1} {'type': 'loss', 'content': 0.18574275076389313, 'timestamp': '2025-09-10 02:27:40.190268', 'step': 1418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.221547', 'step': 1418, 'epoch': 1} {'type': 'loss', 'content': 0.21580468118190765, 'timestamp': '2025-09-10 02:27:40.223746', 'step': 1419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:40.255013', 'step': 1419, 'epoch': 1} {'type': 'loss', 'content': 0.28283295035362244, 'timestamp': '2025-09-10 02:27:40.278411', 'step': 1420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:40.313577', 'step': 1420, 'epoch': 1} {'type': 'loss', 'content': 0.15508276224136353, 'timestamp': '2025-09-10 02:27:40.315884', 'step': 1421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:40.346495', 'step': 1421, 'epoch': 1} {'type': 'loss', 'content': 0.17316778004169464, 'timestamp': '2025-09-10 02:27:40.349147', 'step': 1422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.379753', 'step': 1422, 'epoch': 1} {'type': 'loss', 'content': 0.13530956208705902, 'timestamp': '2025-09-10 02:27:40.381779', 'step': 1423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.411137', 'step': 1423, 'epoch': 1} {'type': 'loss', 'content': 0.20697049796581268, 'timestamp': '2025-09-10 02:27:40.434679', 'step': 1424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.464900', 'step': 1424, 'epoch': 1} {'type': 'loss', 'content': 0.16505642235279083, 'timestamp': '2025-09-10 02:27:40.467185', 'step': 1425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:40.497738', 'step': 1425, 'epoch': 1} {'type': 'loss', 'content': 0.2559031546115875, 'timestamp': '2025-09-10 02:27:40.500185', 'step': 1426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:40.530068', 'step': 1426, 'epoch': 1} {'type': 'loss', 'content': 0.20212917029857635, 'timestamp': '2025-09-10 02:27:40.532802', 'step': 1427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.562224', 'step': 1427, 'epoch': 1} {'type': 'loss', 'content': 0.1673550307750702, 'timestamp': '2025-09-10 02:27:40.585856', 'step': 1428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.615638', 'step': 1428, 'epoch': 1} {'type': 'loss', 'content': 0.15183541178703308, 'timestamp': '2025-09-10 02:27:40.617573', 'step': 1429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:40.647464', 'step': 1429, 'epoch': 1} {'type': 'loss', 'content': 0.19864122569561005, 'timestamp': '2025-09-10 02:27:40.649855', 'step': 1430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.683475', 'step': 1430, 'epoch': 1} {'type': 'loss', 'content': 0.25577154755592346, 'timestamp': '2025-09-10 02:27:40.685775', 'step': 1431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:40.715146', 'step': 1431, 'epoch': 1} {'type': 'loss', 'content': 0.21723061800003052, 'timestamp': '2025-09-10 02:27:40.738562', 'step': 1432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.768748', 'step': 1432, 'epoch': 1} {'type': 'loss', 'content': 0.1383102387189865, 'timestamp': '2025-09-10 02:27:40.770555', 'step': 1433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:40.799806', 'step': 1433, 'epoch': 1} {'type': 'loss', 'content': 0.2904488444328308, 'timestamp': '2025-09-10 02:27:40.802062', 'step': 1434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.833181', 'step': 1434, 'epoch': 1} {'type': 'loss', 'content': 0.12656357884407043, 'timestamp': '2025-09-10 02:27:40.835254', 'step': 1435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.864667', 'step': 1435, 'epoch': 1} {'type': 'loss', 'content': 0.13765905797481537, 'timestamp': '2025-09-10 02:27:40.888003', 'step': 1436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.917708', 'step': 1436, 'epoch': 1} {'type': 'loss', 'content': 0.20871031284332275, 'timestamp': '2025-09-10 02:27:40.919886', 'step': 1437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.951557', 'step': 1437, 'epoch': 1} {'type': 'loss', 'content': 0.1571863442659378, 'timestamp': '2025-09-10 02:27:40.953532', 'step': 1438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:40.983189', 'step': 1438, 'epoch': 1} {'type': 'loss', 'content': 0.1968434453010559, 'timestamp': '2025-09-10 02:27:40.985809', 'step': 1439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:41.016859', 'step': 1439, 'epoch': 1} {'type': 'loss', 'content': 0.1920316070318222, 'timestamp': '2025-09-10 02:27:41.040971', 'step': 1440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.071196', 'step': 1440, 'epoch': 1} {'type': 'loss', 'content': 0.14187264442443848, 'timestamp': '2025-09-10 02:27:41.073520', 'step': 1441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.103373', 'step': 1441, 'epoch': 1} {'type': 'loss', 'content': 0.24554018676280975, 'timestamp': '2025-09-10 02:27:41.105594', 'step': 1442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.136186', 'step': 1442, 'epoch': 1} {'type': 'loss', 'content': 0.18737241625785828, 'timestamp': '2025-09-10 02:27:41.138255', 'step': 1443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.168567', 'step': 1443, 'epoch': 1} {'type': 'loss', 'content': 0.26146048307418823, 'timestamp': '2025-09-10 02:27:41.191947', 'step': 1444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.221624', 'step': 1444, 'epoch': 1} {'type': 'loss', 'content': 0.3453620672225952, 'timestamp': '2025-09-10 02:27:41.223881', 'step': 1445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:41.254454', 'step': 1445, 'epoch': 1} {'type': 'loss', 'content': 0.13598206639289856, 'timestamp': '2025-09-10 02:27:41.256664', 'step': 1446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:41.286395', 'step': 1446, 'epoch': 1} {'type': 'loss', 'content': 0.15525448322296143, 'timestamp': '2025-09-10 02:27:41.289257', 'step': 1447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.318955', 'step': 1447, 'epoch': 1} {'type': 'loss', 'content': 0.2544618248939514, 'timestamp': '2025-09-10 02:27:41.343317', 'step': 1448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.373381', 'step': 1448, 'epoch': 1} {'type': 'loss', 'content': 0.1638638973236084, 'timestamp': '2025-09-10 02:27:41.375394', 'step': 1449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:41.405307', 'step': 1449, 'epoch': 1} {'type': 'loss', 'content': 0.2170371413230896, 'timestamp': '2025-09-10 02:27:41.407446', 'step': 1450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.437394', 'step': 1450, 'epoch': 1} {'type': 'loss', 'content': 0.2609911561012268, 'timestamp': '2025-09-10 02:27:41.439774', 'step': 1451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.470130', 'step': 1451, 'epoch': 1} {'type': 'loss', 'content': 0.17182454466819763, 'timestamp': '2025-09-10 02:27:41.495102', 'step': 1452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.530423', 'step': 1452, 'epoch': 1} {'type': 'loss', 'content': 0.13314905762672424, 'timestamp': '2025-09-10 02:27:41.534690', 'step': 1453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.564638', 'step': 1453, 'epoch': 1} {'type': 'loss', 'content': 0.10836134105920792, 'timestamp': '2025-09-10 02:27:41.566796', 'step': 1454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.597811', 'step': 1454, 'epoch': 1} {'type': 'loss', 'content': 0.2741886377334595, 'timestamp': '2025-09-10 02:27:41.600479', 'step': 1455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.630924', 'step': 1455, 'epoch': 1} {'type': 'loss', 'content': 0.09891335666179657, 'timestamp': '2025-09-10 02:27:41.654569', 'step': 1456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.690326', 'step': 1456, 'epoch': 1} {'type': 'loss', 'content': 0.17132413387298584, 'timestamp': '2025-09-10 02:27:41.693675', 'step': 1457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.734238', 'step': 1457, 'epoch': 1} {'type': 'loss', 'content': 0.15038354694843292, 'timestamp': '2025-09-10 02:27:41.738982', 'step': 1458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.771599', 'step': 1458, 'epoch': 1} {'type': 'loss', 'content': 0.19473235309123993, 'timestamp': '2025-09-10 02:27:41.774214', 'step': 1459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.804243', 'step': 1459, 'epoch': 1} {'type': 'loss', 'content': 0.1255856156349182, 'timestamp': '2025-09-10 02:27:41.828009', 'step': 1460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:41.857610', 'step': 1460, 'epoch': 1} {'type': 'loss', 'content': 0.1909676045179367, 'timestamp': '2025-09-10 02:27:41.870210', 'step': 1461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.902303', 'step': 1461, 'epoch': 1} {'type': 'loss', 'content': 0.19082017242908478, 'timestamp': '2025-09-10 02:27:41.904414', 'step': 1462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.936247', 'step': 1462, 'epoch': 1} {'type': 'loss', 'content': 0.27961423993110657, 'timestamp': '2025-09-10 02:27:41.938679', 'step': 1463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.969024', 'step': 1463, 'epoch': 1} {'type': 'loss', 'content': 0.1893368512392044, 'timestamp': '2025-09-10 02:27:41.997850', 'step': 1464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:42.029692', 'step': 1464, 'epoch': 1} {'type': 'loss', 'content': 0.20091255009174347, 'timestamp': '2025-09-10 02:27:42.033272', 'step': 1465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.064965', 'step': 1465, 'epoch': 1} {'type': 'loss', 'content': 0.1995769441127777, 'timestamp': '2025-09-10 02:27:42.067372', 'step': 1466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.098134', 'step': 1466, 'epoch': 1} {'type': 'loss', 'content': 0.1512543261051178, 'timestamp': '2025-09-10 02:27:42.101753', 'step': 1467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.135533', 'step': 1467, 'epoch': 1} {'type': 'loss', 'content': 0.24283869564533234, 'timestamp': '2025-09-10 02:27:42.161362', 'step': 1468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.192049', 'step': 1468, 'epoch': 1} {'type': 'loss', 'content': 0.3283241093158722, 'timestamp': '2025-09-10 02:27:42.195867', 'step': 1469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.232026', 'step': 1469, 'epoch': 1} {'type': 'loss', 'content': 0.24464347958564758, 'timestamp': '2025-09-10 02:27:42.235114', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:42.266290', 'step': 1470, 'epoch': 1} {'type': 'loss', 'content': 0.15799148380756378, 'timestamp': '2025-09-10 02:27:42.268552', 'step': 1471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.299382', 'step': 1471, 'epoch': 1} {'type': 'loss', 'content': 0.1330559253692627, 'timestamp': '2025-09-10 02:27:42.326391', 'step': 1472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.357504', 'step': 1472, 'epoch': 1} {'type': 'loss', 'content': 0.12853507697582245, 'timestamp': '2025-09-10 02:27:42.359768', 'step': 1473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:42.389222', 'step': 1473, 'epoch': 1} {'type': 'loss', 'content': 0.20328828692436218, 'timestamp': '2025-09-10 02:27:42.391318', 'step': 1474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:42.422716', 'step': 1474, 'epoch': 1} {'type': 'loss', 'content': 0.14749926328659058, 'timestamp': '2025-09-10 02:27:42.424697', 'step': 1475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.454541', 'step': 1475, 'epoch': 1} {'type': 'loss', 'content': 0.15892282128334045, 'timestamp': '2025-09-10 02:27:42.478049', 'step': 1476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.509688', 'step': 1476, 'epoch': 1} {'type': 'loss', 'content': 0.21887089312076569, 'timestamp': '2025-09-10 02:27:42.511987', 'step': 1477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:42.541387', 'step': 1477, 'epoch': 1} {'type': 'loss', 'content': 0.0773259699344635, 'timestamp': '2025-09-10 02:27:42.543543', 'step': 1478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.574461', 'step': 1478, 'epoch': 1} {'type': 'loss', 'content': 0.22911711037158966, 'timestamp': '2025-09-10 02:27:42.576762', 'step': 1479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.608046', 'step': 1479, 'epoch': 1} {'type': 'loss', 'content': 0.1879202276468277, 'timestamp': '2025-09-10 02:27:42.631782', 'step': 1480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:42.663913', 'step': 1480, 'epoch': 1} {'type': 'loss', 'content': 0.11660662293434143, 'timestamp': '2025-09-10 02:27:42.666086', 'step': 1481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.697362', 'step': 1481, 'epoch': 1} {'type': 'loss', 'content': 0.18091174960136414, 'timestamp': '2025-09-10 02:27:42.699557', 'step': 1482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:42.729708', 'step': 1482, 'epoch': 1} {'type': 'loss', 'content': 0.15936437249183655, 'timestamp': '2025-09-10 02:27:42.731974', 'step': 1483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.762219', 'step': 1483, 'epoch': 1} {'type': 'loss', 'content': 0.12736283242702484, 'timestamp': '2025-09-10 02:27:42.786142', 'step': 1484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.816195', 'step': 1484, 'epoch': 1} {'type': 'loss', 'content': 0.2585950791835785, 'timestamp': '2025-09-10 02:27:42.818483', 'step': 1485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.849783', 'step': 1485, 'epoch': 1} {'type': 'loss', 'content': 0.19489987194538116, 'timestamp': '2025-09-10 02:27:42.852106', 'step': 1486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:42.881633', 'step': 1486, 'epoch': 1} {'type': 'loss', 'content': 0.12769420444965363, 'timestamp': '2025-09-10 02:27:42.883968', 'step': 1487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:42.914784', 'step': 1487, 'epoch': 1} {'type': 'loss', 'content': 0.1663716435432434, 'timestamp': '2025-09-10 02:27:42.938297', 'step': 1488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:42.969209', 'step': 1488, 'epoch': 1} {'type': 'loss', 'content': 0.20089690387248993, 'timestamp': '2025-09-10 02:27:42.971320', 'step': 1489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:43.000549', 'step': 1489, 'epoch': 1} {'type': 'loss', 'content': 0.21185971796512604, 'timestamp': '2025-09-10 02:27:43.002701', 'step': 1490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:43.032544', 'step': 1490, 'epoch': 1} {'type': 'loss', 'content': 0.16383486986160278, 'timestamp': '2025-09-10 02:27:43.035216', 'step': 1491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.065507', 'step': 1491, 'epoch': 1} {'type': 'loss', 'content': 0.18418952822685242, 'timestamp': '2025-09-10 02:27:43.088881', 'step': 1492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.119822', 'step': 1492, 'epoch': 1} {'type': 'loss', 'content': 0.2618730664253235, 'timestamp': '2025-09-10 02:27:43.122132', 'step': 1493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:43.151498', 'step': 1493, 'epoch': 1} {'type': 'loss', 'content': 0.15450426936149597, 'timestamp': '2025-09-10 02:27:43.153624', 'step': 1494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:43.183501', 'step': 1494, 'epoch': 1} {'type': 'loss', 'content': 0.19903071224689484, 'timestamp': '2025-09-10 02:27:43.185764', 'step': 1495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:43.217458', 'step': 1495, 'epoch': 1} {'type': 'loss', 'content': 0.33043667674064636, 'timestamp': '2025-09-10 02:27:43.240780', 'step': 1496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.270808', 'step': 1496, 'epoch': 1} {'type': 'loss', 'content': 0.17400269210338593, 'timestamp': '2025-09-10 02:27:43.272896', 'step': 1497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:43.304394', 'step': 1497, 'epoch': 1} {'type': 'loss', 'content': 0.13464659452438354, 'timestamp': '2025-09-10 02:27:43.306753', 'step': 1498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:43.337189', 'step': 1498, 'epoch': 1} {'type': 'loss', 'content': 0.1151791661977768, 'timestamp': '2025-09-10 02:27:43.340260', 'step': 1499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:43.369989', 'step': 1499, 'epoch': 1} {'type': 'loss', 'content': 0.1916481852531433, 'timestamp': '2025-09-10 02:27:43.393555', 'step': 1500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-09-10 02:27:48.018421', 'step': 1500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:48.063521', 'step': 1500, 'epoch': 1} {'type': 'loss', 'content': 0.208436518907547, 'timestamp': '2025-09-10 02:27:48.065755', 'step': 1501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:48.096355', 'step': 1501, 'epoch': 1} {'type': 'loss', 'content': 0.30194470286369324, 'timestamp': '2025-09-10 02:27:48.098386', 'step': 1502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:48.127806', 'step': 1502, 'epoch': 1} {'type': 'loss', 'content': 0.16653193533420563, 'timestamp': '2025-09-10 02:27:48.130249', 'step': 1503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.159864', 'step': 1503, 'epoch': 1} {'type': 'loss', 'content': 0.1229623481631279, 'timestamp': '2025-09-10 02:27:48.183280', 'step': 1504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.212903', 'step': 1504, 'epoch': 1} {'type': 'loss', 'content': 0.18015535175800323, 'timestamp': '2025-09-10 02:27:48.214993', 'step': 1505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:48.244639', 'step': 1505, 'epoch': 1} {'type': 'loss', 'content': 0.1812284141778946, 'timestamp': '2025-09-10 02:27:48.247314', 'step': 1506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.277250', 'step': 1506, 'epoch': 1} {'type': 'loss', 'content': 0.1946370005607605, 'timestamp': '2025-09-10 02:27:48.279485', 'step': 1507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:48.309390', 'step': 1507, 'epoch': 1} {'type': 'loss', 'content': 0.1698225438594818, 'timestamp': '2025-09-10 02:27:48.336505', 'step': 1508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:48.365906', 'step': 1508, 'epoch': 1} {'type': 'loss', 'content': 0.16314013302326202, 'timestamp': '2025-09-10 02:27:48.368843', 'step': 1509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:48.398848', 'step': 1509, 'epoch': 1} {'type': 'loss', 'content': 0.19676242768764496, 'timestamp': '2025-09-10 02:27:48.401077', 'step': 1510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:48.431034', 'step': 1510, 'epoch': 1} {'type': 'loss', 'content': 0.14766865968704224, 'timestamp': '2025-09-10 02:27:48.435346', 'step': 1511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.465373', 'step': 1511, 'epoch': 1} {'type': 'loss', 'content': 0.17597605288028717, 'timestamp': '2025-09-10 02:27:48.490614', 'step': 1512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.521142', 'step': 1512, 'epoch': 1} {'type': 'loss', 'content': 0.16231153905391693, 'timestamp': '2025-09-10 02:27:48.523504', 'step': 1513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.553019', 'step': 1513, 'epoch': 1} {'type': 'loss', 'content': 0.3748335838317871, 'timestamp': '2025-09-10 02:27:48.555194', 'step': 1514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:48.586037', 'step': 1514, 'epoch': 1} {'type': 'loss', 'content': 0.24270129203796387, 'timestamp': '2025-09-10 02:27:48.588184', 'step': 1515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.617861', 'step': 1515, 'epoch': 1} {'type': 'loss', 'content': 0.1895674616098404, 'timestamp': '2025-09-10 02:27:48.641413', 'step': 1516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.671585', 'step': 1516, 'epoch': 1} {'type': 'loss', 'content': 0.15213176608085632, 'timestamp': '2025-09-10 02:27:48.673947', 'step': 1517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.703619', 'step': 1517, 'epoch': 1} {'type': 'loss', 'content': 0.21298444271087646, 'timestamp': '2025-09-10 02:27:48.709395', 'step': 1518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:48.748616', 'step': 1518, 'epoch': 1} {'type': 'loss', 'content': 0.3402062952518463, 'timestamp': '2025-09-10 02:27:48.753468', 'step': 1519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.790275', 'step': 1519, 'epoch': 1} {'type': 'loss', 'content': 0.09619363397359848, 'timestamp': '2025-09-10 02:27:48.813735', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.843451', 'step': 1520, 'epoch': 1} {'type': 'loss', 'content': 0.05416851118206978, 'timestamp': '2025-09-10 02:27:48.845903', 'step': 1521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:48.877572', 'step': 1521, 'epoch': 1} {'type': 'loss', 'content': 0.19083616137504578, 'timestamp': '2025-09-10 02:27:48.879623', 'step': 1522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:48.910362', 'step': 1522, 'epoch': 1} {'type': 'loss', 'content': 0.19336295127868652, 'timestamp': '2025-09-10 02:27:48.913030', 'step': 1523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:48.942931', 'step': 1523, 'epoch': 1} {'type': 'loss', 'content': 0.20927836000919342, 'timestamp': '2025-09-10 02:27:48.966438', 'step': 1524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:48.997295', 'step': 1524, 'epoch': 1} {'type': 'loss', 'content': 0.21224692463874817, 'timestamp': '2025-09-10 02:27:48.999532', 'step': 1525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:49.029933', 'step': 1525, 'epoch': 1} {'type': 'loss', 'content': 0.2039719820022583, 'timestamp': '2025-09-10 02:27:49.032198', 'step': 1526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.061737', 'step': 1526, 'epoch': 1} {'type': 'loss', 'content': 0.23102615773677826, 'timestamp': '2025-09-10 02:27:49.063999', 'step': 1527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:49.095327', 'step': 1527, 'epoch': 1} {'type': 'loss', 'content': 0.11967630684375763, 'timestamp': '2025-09-10 02:27:49.119225', 'step': 1528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.149398', 'step': 1528, 'epoch': 1} {'type': 'loss', 'content': 0.2091810703277588, 'timestamp': '2025-09-10 02:27:49.151543', 'step': 1529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.181181', 'step': 1529, 'epoch': 1} {'type': 'loss', 'content': 0.1466706097126007, 'timestamp': '2025-09-10 02:27:49.183719', 'step': 1530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.214055', 'step': 1530, 'epoch': 1} {'type': 'loss', 'content': 0.1558985412120819, 'timestamp': '2025-09-10 02:27:49.216154', 'step': 1531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.246000', 'step': 1531, 'epoch': 1} {'type': 'loss', 'content': 0.2174876183271408, 'timestamp': '2025-09-10 02:27:49.269199', 'step': 1532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:49.300620', 'step': 1532, 'epoch': 1} {'type': 'loss', 'content': 0.16664589941501617, 'timestamp': '2025-09-10 02:27:49.303058', 'step': 1533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.340508', 'step': 1533, 'epoch': 1} {'type': 'loss', 'content': 0.14721104502677917, 'timestamp': '2025-09-10 02:27:49.342678', 'step': 1534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.373947', 'step': 1534, 'epoch': 1} {'type': 'loss', 'content': 0.2503291964530945, 'timestamp': '2025-09-10 02:27:49.376466', 'step': 1535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:49.406603', 'step': 1535, 'epoch': 1} {'type': 'loss', 'content': 0.09313204139471054, 'timestamp': '2025-09-10 02:27:49.430096', 'step': 1536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.459924', 'step': 1536, 'epoch': 1} {'type': 'loss', 'content': 0.12808629870414734, 'timestamp': '2025-09-10 02:27:49.462200', 'step': 1537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:49.491950', 'step': 1537, 'epoch': 1} {'type': 'loss', 'content': 0.2737681567668915, 'timestamp': '2025-09-10 02:27:49.494322', 'step': 1538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.523799', 'step': 1538, 'epoch': 1} {'type': 'loss', 'content': 0.18486657738685608, 'timestamp': '2025-09-10 02:27:49.525902', 'step': 1539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:49.555552', 'step': 1539, 'epoch': 1} {'type': 'loss', 'content': 0.18226724863052368, 'timestamp': '2025-09-10 02:27:49.579428', 'step': 1540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.610201', 'step': 1540, 'epoch': 1} {'type': 'loss', 'content': 0.2509365677833557, 'timestamp': '2025-09-10 02:27:49.612612', 'step': 1541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:49.642370', 'step': 1541, 'epoch': 1} {'type': 'loss', 'content': 0.2147054821252823, 'timestamp': '2025-09-10 02:27:49.644609', 'step': 1542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.674517', 'step': 1542, 'epoch': 1} {'type': 'loss', 'content': 0.09834672510623932, 'timestamp': '2025-09-10 02:27:49.677520', 'step': 1543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.707142', 'step': 1543, 'epoch': 1} {'type': 'loss', 'content': 0.12396986037492752, 'timestamp': '2025-09-10 02:27:49.730511', 'step': 1544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.760627', 'step': 1544, 'epoch': 1} {'type': 'loss', 'content': 0.31529855728149414, 'timestamp': '2025-09-10 02:27:49.762858', 'step': 1545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.792393', 'step': 1545, 'epoch': 1} {'type': 'loss', 'content': 0.12219271808862686, 'timestamp': '2025-09-10 02:27:49.794573', 'step': 1546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.824349', 'step': 1546, 'epoch': 1} {'type': 'loss', 'content': 0.10178235173225403, 'timestamp': '2025-09-10 02:27:49.826517', 'step': 1547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:49.857027', 'step': 1547, 'epoch': 1} {'type': 'loss', 'content': 0.2005450576543808, 'timestamp': '2025-09-10 02:27:49.880636', 'step': 1548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.911300', 'step': 1548, 'epoch': 1} {'type': 'loss', 'content': 0.17169404029846191, 'timestamp': '2025-09-10 02:27:49.913863', 'step': 1549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.943599', 'step': 1549, 'epoch': 1} {'type': 'loss', 'content': 0.11784131824970245, 'timestamp': '2025-09-10 02:27:49.945753', 'step': 1550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:49.975386', 'step': 1550, 'epoch': 1} {'type': 'loss', 'content': 0.14091670513153076, 'timestamp': '2025-09-10 02:27:49.977636', 'step': 1551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.009887', 'step': 1551, 'epoch': 1} {'type': 'loss', 'content': 0.10270613431930542, 'timestamp': '2025-09-10 02:27:50.033273', 'step': 1552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.064591', 'step': 1552, 'epoch': 1} {'type': 'loss', 'content': 0.18356768786907196, 'timestamp': '2025-09-10 02:27:50.066727', 'step': 1553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.096957', 'step': 1553, 'epoch': 1} {'type': 'loss', 'content': 0.17817917466163635, 'timestamp': '2025-09-10 02:27:50.099123', 'step': 1554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.129804', 'step': 1554, 'epoch': 1} {'type': 'loss', 'content': 0.30347609519958496, 'timestamp': '2025-09-10 02:27:50.132012', 'step': 1555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.162047', 'step': 1555, 'epoch': 1} {'type': 'loss', 'content': 0.18345065414905548, 'timestamp': '2025-09-10 02:27:50.185812', 'step': 1556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.217237', 'step': 1556, 'epoch': 1} {'type': 'loss', 'content': 0.19137924909591675, 'timestamp': '2025-09-10 02:27:50.234414', 'step': 1557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.303961', 'step': 1557, 'epoch': 1} {'type': 'loss', 'content': 0.13973063230514526, 'timestamp': '2025-09-10 02:27:50.309972', 'step': 1558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:50.371215', 'step': 1558, 'epoch': 1} {'type': 'loss', 'content': 0.18832065165042877, 'timestamp': '2025-09-10 02:27:50.376509', 'step': 1559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.425008', 'step': 1559, 'epoch': 1} {'type': 'loss', 'content': 0.20411741733551025, 'timestamp': '2025-09-10 02:27:50.450720', 'step': 1560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.505264', 'step': 1560, 'epoch': 1} {'type': 'loss', 'content': 0.2138664424419403, 'timestamp': '2025-09-10 02:27:50.513246', 'step': 1561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.555544', 'step': 1561, 'epoch': 1} {'type': 'loss', 'content': 0.17984925210475922, 'timestamp': '2025-09-10 02:27:50.562199', 'step': 1562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:50.603239', 'step': 1562, 'epoch': 1} {'type': 'loss', 'content': 0.19969528913497925, 'timestamp': '2025-09-10 02:27:50.606473', 'step': 1563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.644657', 'step': 1563, 'epoch': 1} {'type': 'loss', 'content': 0.13769634068012238, 'timestamp': '2025-09-10 02:27:50.670162', 'step': 1564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.711570', 'step': 1564, 'epoch': 1} {'type': 'loss', 'content': 0.15553095936775208, 'timestamp': '2025-09-10 02:27:50.719302', 'step': 1565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:50.762762', 'step': 1565, 'epoch': 1} {'type': 'loss', 'content': 0.24129781126976013, 'timestamp': '2025-09-10 02:27:50.771748', 'step': 1566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:50.818605', 'step': 1566, 'epoch': 1} {'type': 'loss', 'content': 0.10086103528738022, 'timestamp': '2025-09-10 02:27:50.825612', 'step': 1567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.876994', 'step': 1567, 'epoch': 1} {'type': 'loss', 'content': 0.24905040860176086, 'timestamp': '2025-09-10 02:27:50.905953', 'step': 1568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:50.948484', 'step': 1568, 'epoch': 1} {'type': 'loss', 'content': 0.14435957372188568, 'timestamp': '2025-09-10 02:27:50.952730', 'step': 1569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.994606', 'step': 1569, 'epoch': 1} {'type': 'loss', 'content': 0.19666153192520142, 'timestamp': '2025-09-10 02:27:51.000192', 'step': 1570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:51.038066', 'step': 1570, 'epoch': 1} {'type': 'loss', 'content': 0.13256269693374634, 'timestamp': '2025-09-10 02:27:51.041574', 'step': 1571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.076667', 'step': 1571, 'epoch': 1} {'type': 'loss', 'content': 0.3093937933444977, 'timestamp': '2025-09-10 02:27:51.102503', 'step': 1572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:51.133198', 'step': 1572, 'epoch': 1} {'type': 'loss', 'content': 0.14535735547542572, 'timestamp': '2025-09-10 02:27:51.135745', 'step': 1573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:51.166745', 'step': 1573, 'epoch': 1} {'type': 'loss', 'content': 0.10331781208515167, 'timestamp': '2025-09-10 02:27:51.171089', 'step': 1574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.201112', 'step': 1574, 'epoch': 1} {'type': 'loss', 'content': 0.19316962361335754, 'timestamp': '2025-09-10 02:27:51.202959', 'step': 1575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.232344', 'step': 1575, 'epoch': 1} {'type': 'loss', 'content': 0.1923162192106247, 'timestamp': '2025-09-10 02:27:51.255780', 'step': 1576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.286552', 'step': 1576, 'epoch': 1} {'type': 'loss', 'content': 0.1785462349653244, 'timestamp': '2025-09-10 02:27:51.289701', 'step': 1577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.319447', 'step': 1577, 'epoch': 1} {'type': 'loss', 'content': 0.14821365475654602, 'timestamp': '2025-09-10 02:27:51.323676', 'step': 1578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.353651', 'step': 1578, 'epoch': 1} {'type': 'loss', 'content': 0.1564808338880539, 'timestamp': '2025-09-10 02:27:51.355723', 'step': 1579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.386267', 'step': 1579, 'epoch': 1} {'type': 'loss', 'content': 0.20652960240840912, 'timestamp': '2025-09-10 02:27:51.409843', 'step': 1580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:51.439562', 'step': 1580, 'epoch': 1} {'type': 'loss', 'content': 0.1362038105726242, 'timestamp': '2025-09-10 02:27:51.441747', 'step': 1581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:51.471581', 'step': 1581, 'epoch': 1} {'type': 'loss', 'content': 0.20531262457370758, 'timestamp': '2025-09-10 02:27:51.473779', 'step': 1582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:51.504355', 'step': 1582, 'epoch': 1} {'type': 'loss', 'content': 0.22259899973869324, 'timestamp': '2025-09-10 02:27:51.506462', 'step': 1583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:51.536412', 'step': 1583, 'epoch': 1} {'type': 'loss', 'content': 0.1760658621788025, 'timestamp': '2025-09-10 02:27:51.559835', 'step': 1584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.589932', 'step': 1584, 'epoch': 1} {'type': 'loss', 'content': 0.1469094455242157, 'timestamp': '2025-09-10 02:27:51.592256', 'step': 1585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.621672', 'step': 1585, 'epoch': 1} {'type': 'loss', 'content': 0.14751288294792175, 'timestamp': '2025-09-10 02:27:51.623752', 'step': 1586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.653682', 'step': 1586, 'epoch': 1} {'type': 'loss', 'content': 0.27035853266716003, 'timestamp': '2025-09-10 02:27:51.656078', 'step': 1587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:51.686357', 'step': 1587, 'epoch': 1} {'type': 'loss', 'content': 0.16404743492603302, 'timestamp': '2025-09-10 02:27:51.709970', 'step': 1588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.740328', 'step': 1588, 'epoch': 1} {'type': 'loss', 'content': 0.13676762580871582, 'timestamp': '2025-09-10 02:27:51.742576', 'step': 1589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:51.771618', 'step': 1589, 'epoch': 1} {'type': 'loss', 'content': 0.2185717672109604, 'timestamp': '2025-09-10 02:27:51.774167', 'step': 1590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:51.803812', 'step': 1590, 'epoch': 1} {'type': 'loss', 'content': 0.17439430952072144, 'timestamp': '2025-09-10 02:27:51.807668', 'step': 1591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.837601', 'step': 1591, 'epoch': 1} {'type': 'loss', 'content': 0.23885121941566467, 'timestamp': '2025-09-10 02:27:51.861363', 'step': 1592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.891917', 'step': 1592, 'epoch': 1} {'type': 'loss', 'content': 0.15082360804080963, 'timestamp': '2025-09-10 02:27:51.894098', 'step': 1593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.923433', 'step': 1593, 'epoch': 1} {'type': 'loss', 'content': 0.29262039065361023, 'timestamp': '2025-09-10 02:27:51.926963', 'step': 1594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:51.958674', 'step': 1594, 'epoch': 1} {'type': 'loss', 'content': 0.16079376637935638, 'timestamp': '2025-09-10 02:27:51.960906', 'step': 1595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.993203', 'step': 1595, 'epoch': 1} {'type': 'loss', 'content': 0.2269735485315323, 'timestamp': '2025-09-10 02:27:52.016847', 'step': 1596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.046649', 'step': 1596, 'epoch': 1} {'type': 'loss', 'content': 0.19752180576324463, 'timestamp': '2025-09-10 02:27:52.048627', 'step': 1597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.078722', 'step': 1597, 'epoch': 1} {'type': 'loss', 'content': 0.14452818036079407, 'timestamp': '2025-09-10 02:27:52.081000', 'step': 1598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.110828', 'step': 1598, 'epoch': 1} {'type': 'loss', 'content': 0.2029300332069397, 'timestamp': '2025-09-10 02:27:52.113141', 'step': 1599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.142906', 'step': 1599, 'epoch': 1} {'type': 'loss', 'content': 0.1970805525779724, 'timestamp': '2025-09-10 02:27:52.166309', 'step': 1600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:52.196314', 'step': 1600, 'epoch': 1} {'type': 'loss', 'content': 0.17307020723819733, 'timestamp': '2025-09-10 02:27:52.198955', 'step': 1601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:52.228660', 'step': 1601, 'epoch': 1} {'type': 'loss', 'content': 0.15423499047756195, 'timestamp': '2025-09-10 02:27:52.230929', 'step': 1602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:52.261713', 'step': 1602, 'epoch': 1} {'type': 'loss', 'content': 0.1378912478685379, 'timestamp': '2025-09-10 02:27:52.265877', 'step': 1603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:52.295311', 'step': 1603, 'epoch': 1} {'type': 'loss', 'content': 0.17092697322368622, 'timestamp': '2025-09-10 02:27:52.318853', 'step': 1604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.353666', 'step': 1604, 'epoch': 1} {'type': 'loss', 'content': 0.15078343451023102, 'timestamp': '2025-09-10 02:27:52.355745', 'step': 1605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.386884', 'step': 1605, 'epoch': 1} {'type': 'loss', 'content': 0.2120949923992157, 'timestamp': '2025-09-10 02:27:52.389148', 'step': 1606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:52.418791', 'step': 1606, 'epoch': 1} {'type': 'loss', 'content': 0.10811123251914978, 'timestamp': '2025-09-10 02:27:52.421176', 'step': 1607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:52.451230', 'step': 1607, 'epoch': 1} {'type': 'loss', 'content': 0.19671262800693512, 'timestamp': '2025-09-10 02:27:52.474748', 'step': 1608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:52.505129', 'step': 1608, 'epoch': 1} {'type': 'loss', 'content': 0.18606719374656677, 'timestamp': '2025-09-10 02:27:52.507338', 'step': 1609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.537118', 'step': 1609, 'epoch': 1} {'type': 'loss', 'content': 0.220616415143013, 'timestamp': '2025-09-10 02:27:52.539445', 'step': 1610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.569906', 'step': 1610, 'epoch': 1} {'type': 'loss', 'content': 0.22546008229255676, 'timestamp': '2025-09-10 02:27:52.572142', 'step': 1611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:52.602030', 'step': 1611, 'epoch': 1} {'type': 'loss', 'content': 0.2293155938386917, 'timestamp': '2025-09-10 02:27:52.625617', 'step': 1612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.655666', 'step': 1612, 'epoch': 1} {'type': 'loss', 'content': 0.14028455317020416, 'timestamp': '2025-09-10 02:27:52.658014', 'step': 1613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:52.687929', 'step': 1613, 'epoch': 1} {'type': 'loss', 'content': 0.15013614296913147, 'timestamp': '2025-09-10 02:27:52.690065', 'step': 1614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.720245', 'step': 1614, 'epoch': 1} {'type': 'loss', 'content': 0.28590458631515503, 'timestamp': '2025-09-10 02:27:52.722530', 'step': 1615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.753013', 'step': 1615, 'epoch': 1} {'type': 'loss', 'content': 0.14563164114952087, 'timestamp': '2025-09-10 02:27:52.776735', 'step': 1616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.842505', 'step': 1616, 'epoch': 1} {'type': 'loss', 'content': 0.2180987000465393, 'timestamp': '2025-09-10 02:27:52.847137', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.904904', 'step': 1617, 'epoch': 1} {'type': 'loss', 'content': 0.19393721222877502, 'timestamp': '2025-09-10 02:27:52.922758', 'step': 1618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:53.007986', 'step': 1618, 'epoch': 1} {'type': 'loss', 'content': 0.18817120790481567, 'timestamp': '2025-09-10 02:27:53.025381', 'step': 1619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:53.116914', 'step': 1619, 'epoch': 1} {'type': 'loss', 'content': 0.13851280510425568, 'timestamp': '2025-09-10 02:27:53.155470', 'step': 1620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:53.220278', 'step': 1620, 'epoch': 1} {'type': 'loss', 'content': 0.24578163027763367, 'timestamp': '2025-09-10 02:27:53.237607', 'step': 1621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:53.311923', 'step': 1621, 'epoch': 1} {'type': 'loss', 'content': 0.19874680042266846, 'timestamp': '2025-09-10 02:27:53.329387', 'step': 1622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:53.407067', 'step': 1622, 'epoch': 1} {'type': 'loss', 'content': 0.1824241280555725, 'timestamp': '2025-09-10 02:27:53.421541', 'step': 1623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:53.466679', 'step': 1623, 'epoch': 1} {'type': 'loss', 'content': 0.3200964033603668, 'timestamp': '2025-09-10 02:27:53.495005', 'step': 1624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:53.550725', 'step': 1624, 'epoch': 1} {'type': 'loss', 'content': 0.14852704107761383, 'timestamp': '2025-09-10 02:27:53.554013', 'step': 1625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:53.612696', 'step': 1625, 'epoch': 1} {'type': 'loss', 'content': 0.08743303269147873, 'timestamp': '2025-09-10 02:27:53.630620', 'step': 1626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:53.720024', 'step': 1626, 'epoch': 1} {'type': 'loss', 'content': 0.146645188331604, 'timestamp': '2025-09-10 02:27:53.735144', 'step': 1627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:53.771487', 'step': 1627, 'epoch': 1} {'type': 'loss', 'content': 0.1505172699689865, 'timestamp': '2025-09-10 02:27:53.796082', 'step': 1628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:53.827001', 'step': 1628, 'epoch': 1} {'type': 'loss', 'content': 0.12328978627920151, 'timestamp': '2025-09-10 02:27:53.829422', 'step': 1629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:53.859747', 'step': 1629, 'epoch': 1} {'type': 'loss', 'content': 0.14735378324985504, 'timestamp': '2025-09-10 02:27:53.862213', 'step': 1630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:53.891872', 'step': 1630, 'epoch': 1} {'type': 'loss', 'content': 0.11257901042699814, 'timestamp': '2025-09-10 02:27:53.893996', 'step': 1631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:53.924258', 'step': 1631, 'epoch': 1} {'type': 'loss', 'content': 0.1725803166627884, 'timestamp': '2025-09-10 02:27:53.947769', 'step': 1632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:53.977310', 'step': 1632, 'epoch': 1} {'type': 'loss', 'content': 0.1635158360004425, 'timestamp': '2025-09-10 02:27:53.979411', 'step': 1633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:54.009304', 'step': 1633, 'epoch': 1} {'type': 'loss', 'content': 0.1498817503452301, 'timestamp': '2025-09-10 02:27:54.011417', 'step': 1634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:54.041344', 'step': 1634, 'epoch': 1} {'type': 'loss', 'content': 0.17684946954250336, 'timestamp': '2025-09-10 02:27:54.043793', 'step': 1635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.074717', 'step': 1635, 'epoch': 1} {'type': 'loss', 'content': 0.18078084290027618, 'timestamp': '2025-09-10 02:27:54.098116', 'step': 1636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.128474', 'step': 1636, 'epoch': 1} {'type': 'loss', 'content': 0.23092545568943024, 'timestamp': '2025-09-10 02:27:54.130720', 'step': 1637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.160286', 'step': 1637, 'epoch': 1} {'type': 'loss', 'content': 0.3244459629058838, 'timestamp': '2025-09-10 02:27:54.163629', 'step': 1638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:54.193760', 'step': 1638, 'epoch': 1} {'type': 'loss', 'content': 0.26177749037742615, 'timestamp': '2025-09-10 02:27:54.195924', 'step': 1639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:54.225415', 'step': 1639, 'epoch': 1} {'type': 'loss', 'content': 0.21198830008506775, 'timestamp': '2025-09-10 02:27:54.249065', 'step': 1640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:54.279688', 'step': 1640, 'epoch': 1} {'type': 'loss', 'content': 0.20319446921348572, 'timestamp': '2025-09-10 02:27:54.281906', 'step': 1641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:54.312357', 'step': 1641, 'epoch': 1} {'type': 'loss', 'content': 0.13763369619846344, 'timestamp': '2025-09-10 02:27:54.314451', 'step': 1642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.344395', 'step': 1642, 'epoch': 1} {'type': 'loss', 'content': 0.1629537045955658, 'timestamp': '2025-09-10 02:27:54.348062', 'step': 1643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:54.387668', 'step': 1643, 'epoch': 1} {'type': 'loss', 'content': 0.267329603433609, 'timestamp': '2025-09-10 02:27:54.412395', 'step': 1644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:54.443046', 'step': 1644, 'epoch': 1} {'type': 'loss', 'content': 0.11481105536222458, 'timestamp': '2025-09-10 02:27:54.450982', 'step': 1645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.482050', 'step': 1645, 'epoch': 1} {'type': 'loss', 'content': 0.11753712594509125, 'timestamp': '2025-09-10 02:27:54.484573', 'step': 1646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.515109', 'step': 1646, 'epoch': 1} {'type': 'loss', 'content': 0.12929177284240723, 'timestamp': '2025-09-10 02:27:54.517277', 'step': 1647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:54.547402', 'step': 1647, 'epoch': 1} {'type': 'loss', 'content': 0.17020612955093384, 'timestamp': '2025-09-10 02:27:54.577504', 'step': 1648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.653630', 'step': 1648, 'epoch': 1} {'type': 'loss', 'content': 0.16380922496318817, 'timestamp': '2025-09-10 02:27:54.657441', 'step': 1649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.706208', 'step': 1649, 'epoch': 1} {'type': 'loss', 'content': 0.18438288569450378, 'timestamp': '2025-09-10 02:27:54.711004', 'step': 1650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.743443', 'step': 1650, 'epoch': 1} {'type': 'loss', 'content': 0.17619092762470245, 'timestamp': '2025-09-10 02:27:54.746689', 'step': 1651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.781136', 'step': 1651, 'epoch': 1} {'type': 'loss', 'content': 0.1105962023139, 'timestamp': '2025-09-10 02:27:54.804995', 'step': 1652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:54.837134', 'step': 1652, 'epoch': 1} {'type': 'loss', 'content': 0.2041759490966797, 'timestamp': '2025-09-10 02:27:54.839783', 'step': 1653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.871035', 'step': 1653, 'epoch': 1} {'type': 'loss', 'content': 0.1243283748626709, 'timestamp': '2025-09-10 02:27:54.877275', 'step': 1654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:54.914347', 'step': 1654, 'epoch': 1} {'type': 'loss', 'content': 0.20452246069908142, 'timestamp': '2025-09-10 02:27:54.916433', 'step': 1655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:54.945586', 'step': 1655, 'epoch': 1} {'type': 'loss', 'content': 0.2978971600532532, 'timestamp': '2025-09-10 02:27:54.969502', 'step': 1656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.000613', 'step': 1656, 'epoch': 1} {'type': 'loss', 'content': 0.1647036224603653, 'timestamp': '2025-09-10 02:27:55.002770', 'step': 1657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:55.032902', 'step': 1657, 'epoch': 1} {'type': 'loss', 'content': 0.11210331320762634, 'timestamp': '2025-09-10 02:27:55.034972', 'step': 1658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.065168', 'step': 1658, 'epoch': 1} {'type': 'loss', 'content': 0.15418416261672974, 'timestamp': '2025-09-10 02:27:55.067568', 'step': 1659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.100597', 'step': 1659, 'epoch': 1} {'type': 'loss', 'content': 0.14553411304950714, 'timestamp': '2025-09-10 02:27:55.124133', 'step': 1660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:55.153709', 'step': 1660, 'epoch': 1} {'type': 'loss', 'content': 0.15138208866119385, 'timestamp': '2025-09-10 02:27:55.155940', 'step': 1661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:55.188131', 'step': 1661, 'epoch': 1} {'type': 'loss', 'content': 0.20209358632564545, 'timestamp': '2025-09-10 02:27:55.190481', 'step': 1662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.219921', 'step': 1662, 'epoch': 1} {'type': 'loss', 'content': 0.12472864240407944, 'timestamp': '2025-09-10 02:27:55.222155', 'step': 1663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:55.252181', 'step': 1663, 'epoch': 1} {'type': 'loss', 'content': 0.12352113425731659, 'timestamp': '2025-09-10 02:27:55.275600', 'step': 1664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:55.306331', 'step': 1664, 'epoch': 1} {'type': 'loss', 'content': 0.13142912089824677, 'timestamp': '2025-09-10 02:27:55.308407', 'step': 1665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.338059', 'step': 1665, 'epoch': 1} {'type': 'loss', 'content': 0.1565628945827484, 'timestamp': '2025-09-10 02:27:55.340252', 'step': 1666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.370118', 'step': 1666, 'epoch': 1} {'type': 'loss', 'content': 0.19447042047977448, 'timestamp': '2025-09-10 02:27:55.372463', 'step': 1667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.402600', 'step': 1667, 'epoch': 1} {'type': 'loss', 'content': 0.20538586378097534, 'timestamp': '2025-09-10 02:27:55.426180', 'step': 1668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:55.457186', 'step': 1668, 'epoch': 1} {'type': 'loss', 'content': 0.1197618842124939, 'timestamp': '2025-09-10 02:27:55.459541', 'step': 1669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:55.491188', 'step': 1669, 'epoch': 1} {'type': 'loss', 'content': 0.1976143717765808, 'timestamp': '2025-09-10 02:27:55.494531', 'step': 1670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.526454', 'step': 1670, 'epoch': 1} {'type': 'loss', 'content': 0.12862008810043335, 'timestamp': '2025-09-10 02:27:55.528515', 'step': 1671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:55.558694', 'step': 1671, 'epoch': 1} {'type': 'loss', 'content': 0.1980125606060028, 'timestamp': '2025-09-10 02:27:55.582444', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:55.613344', 'step': 1672, 'epoch': 1} {'type': 'loss', 'content': 0.1849460005760193, 'timestamp': '2025-09-10 02:27:55.616570', 'step': 1673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:55.649362', 'step': 1673, 'epoch': 1} {'type': 'loss', 'content': 0.16364562511444092, 'timestamp': '2025-09-10 02:27:55.651486', 'step': 1674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.681189', 'step': 1674, 'epoch': 1} {'type': 'loss', 'content': 0.20906366407871246, 'timestamp': '2025-09-10 02:27:55.683492', 'step': 1675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.713367', 'step': 1675, 'epoch': 1} {'type': 'loss', 'content': 0.1730397343635559, 'timestamp': '2025-09-10 02:27:55.737723', 'step': 1676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.767750', 'step': 1676, 'epoch': 1} {'type': 'loss', 'content': 0.13876479864120483, 'timestamp': '2025-09-10 02:27:55.769849', 'step': 1677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.800503', 'step': 1677, 'epoch': 1} {'type': 'loss', 'content': 0.14962521195411682, 'timestamp': '2025-09-10 02:27:55.802603', 'step': 1678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:27:55.832139', 'step': 1678, 'epoch': 1} {'type': 'loss', 'content': 0.1507449448108673, 'timestamp': '2025-09-10 02:27:55.839185', 'step': 1679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.869469', 'step': 1679, 'epoch': 1} {'type': 'loss', 'content': 0.1780378371477127, 'timestamp': '2025-09-10 02:27:55.892795', 'step': 1680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:55.923166', 'step': 1680, 'epoch': 1} {'type': 'loss', 'content': 0.10757889598608017, 'timestamp': '2025-09-10 02:27:55.925250', 'step': 1681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:55.955146', 'step': 1681, 'epoch': 1} {'type': 'loss', 'content': 0.12116662412881851, 'timestamp': '2025-09-10 02:27:55.957403', 'step': 1682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:55.987765', 'step': 1682, 'epoch': 1} {'type': 'loss', 'content': 0.16025927662849426, 'timestamp': '2025-09-10 02:27:55.989884', 'step': 1683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.019610', 'step': 1683, 'epoch': 1} {'type': 'loss', 'content': 0.20587848126888275, 'timestamp': '2025-09-10 02:27:56.043130', 'step': 1684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.073653', 'step': 1684, 'epoch': 1} {'type': 'loss', 'content': 0.3277137577533722, 'timestamp': '2025-09-10 02:27:56.076041', 'step': 1685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.105951', 'step': 1685, 'epoch': 1} {'type': 'loss', 'content': 0.1732572466135025, 'timestamp': '2025-09-10 02:27:56.108231', 'step': 1686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.137895', 'step': 1686, 'epoch': 1} {'type': 'loss', 'content': 0.2574252188205719, 'timestamp': '2025-09-10 02:27:56.140304', 'step': 1687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.170912', 'step': 1687, 'epoch': 1} {'type': 'loss', 'content': 0.1123284325003624, 'timestamp': '2025-09-10 02:27:56.194421', 'step': 1688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.224391', 'step': 1688, 'epoch': 1} {'type': 'loss', 'content': 0.13595792651176453, 'timestamp': '2025-09-10 02:27:56.226532', 'step': 1689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:56.256197', 'step': 1689, 'epoch': 1} {'type': 'loss', 'content': 0.25236666202545166, 'timestamp': '2025-09-10 02:27:56.258504', 'step': 1690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:56.288457', 'step': 1690, 'epoch': 1} {'type': 'loss', 'content': 0.30855005979537964, 'timestamp': '2025-09-10 02:27:56.290541', 'step': 1691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.320692', 'step': 1691, 'epoch': 1} {'type': 'loss', 'content': 0.14601625502109528, 'timestamp': '2025-09-10 02:27:56.344159', 'step': 1692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:56.377385', 'step': 1692, 'epoch': 1} {'type': 'loss', 'content': 0.24607355892658234, 'timestamp': '2025-09-10 02:27:56.379715', 'step': 1693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:56.410195', 'step': 1693, 'epoch': 1} {'type': 'loss', 'content': 0.14575345814228058, 'timestamp': '2025-09-10 02:27:56.412438', 'step': 1694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:56.442996', 'step': 1694, 'epoch': 1} {'type': 'loss', 'content': 0.13062724471092224, 'timestamp': '2025-09-10 02:27:56.445675', 'step': 1695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:56.476545', 'step': 1695, 'epoch': 1} {'type': 'loss', 'content': 0.1283513456583023, 'timestamp': '2025-09-10 02:27:56.500032', 'step': 1696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:56.531196', 'step': 1696, 'epoch': 1} {'type': 'loss', 'content': 0.2255048304796219, 'timestamp': '2025-09-10 02:27:56.533291', 'step': 1697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.563224', 'step': 1697, 'epoch': 1} {'type': 'loss', 'content': 0.16981995105743408, 'timestamp': '2025-09-10 02:27:56.565371', 'step': 1698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:56.595694', 'step': 1698, 'epoch': 1} {'type': 'loss', 'content': 0.16101804375648499, 'timestamp': '2025-09-10 02:27:56.597929', 'step': 1699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:56.630933', 'step': 1699, 'epoch': 1} {'type': 'loss', 'content': 0.20146559178829193, 'timestamp': '2025-09-10 02:27:56.654557', 'step': 1700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:56.685349', 'step': 1700, 'epoch': 1} {'type': 'loss', 'content': 0.09810498356819153, 'timestamp': '2025-09-10 02:27:56.687811', 'step': 1701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:56.718828', 'step': 1701, 'epoch': 1} {'type': 'loss', 'content': 0.1625983864068985, 'timestamp': '2025-09-10 02:27:56.721067', 'step': 1702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.751991', 'step': 1702, 'epoch': 1} {'type': 'loss', 'content': 0.14669369161128998, 'timestamp': '2025-09-10 02:27:56.754252', 'step': 1703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.784984', 'step': 1703, 'epoch': 1} {'type': 'loss', 'content': 0.21239078044891357, 'timestamp': '2025-09-10 02:27:56.808593', 'step': 1704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.839501', 'step': 1704, 'epoch': 1} {'type': 'loss', 'content': 0.24081483483314514, 'timestamp': '2025-09-10 02:27:56.841836', 'step': 1705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.871477', 'step': 1705, 'epoch': 1} {'type': 'loss', 'content': 0.15844662487506866, 'timestamp': '2025-09-10 02:27:56.873723', 'step': 1706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:56.904803', 'step': 1706, 'epoch': 1} {'type': 'loss', 'content': 0.222762331366539, 'timestamp': '2025-09-10 02:27:56.907600', 'step': 1707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:56.939553', 'step': 1707, 'epoch': 1} {'type': 'loss', 'content': 0.13834746181964874, 'timestamp': '2025-09-10 02:27:56.963026', 'step': 1708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:56.993311', 'step': 1708, 'epoch': 1} {'type': 'loss', 'content': 0.13758721947669983, 'timestamp': '2025-09-10 02:27:56.995548', 'step': 1709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.025825', 'step': 1709, 'epoch': 1} {'type': 'loss', 'content': 0.20705989003181458, 'timestamp': '2025-09-10 02:27:57.028058', 'step': 1710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:27:57.058389', 'step': 1710, 'epoch': 1} {'type': 'loss', 'content': 0.12865465879440308, 'timestamp': '2025-09-10 02:27:57.062596', 'step': 1711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.092697', 'step': 1711, 'epoch': 1} {'type': 'loss', 'content': 0.1994190365076065, 'timestamp': '2025-09-10 02:27:57.115981', 'step': 1712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.147389', 'step': 1712, 'epoch': 1} {'type': 'loss', 'content': 0.1315227895975113, 'timestamp': '2025-09-10 02:27:57.149984', 'step': 1713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:57.180003', 'step': 1713, 'epoch': 1} {'type': 'loss', 'content': 0.2546180188655853, 'timestamp': '2025-09-10 02:27:57.182431', 'step': 1714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.213948', 'step': 1714, 'epoch': 1} {'type': 'loss', 'content': 0.30710282921791077, 'timestamp': '2025-09-10 02:27:57.216211', 'step': 1715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:57.246021', 'step': 1715, 'epoch': 1} {'type': 'loss', 'content': 0.15557393431663513, 'timestamp': '2025-09-10 02:27:57.270204', 'step': 1716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.300309', 'step': 1716, 'epoch': 1} {'type': 'loss', 'content': 0.14082038402557373, 'timestamp': '2025-09-10 02:27:57.302633', 'step': 1717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:57.332677', 'step': 1717, 'epoch': 1} {'type': 'loss', 'content': 0.15609702467918396, 'timestamp': '2025-09-10 02:27:57.334702', 'step': 1718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.369601', 'step': 1718, 'epoch': 1} {'type': 'loss', 'content': 0.32348790764808655, 'timestamp': '2025-09-10 02:27:57.371613', 'step': 1719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.401656', 'step': 1719, 'epoch': 1} {'type': 'loss', 'content': 0.13222447037696838, 'timestamp': '2025-09-10 02:27:57.425256', 'step': 1720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.455022', 'step': 1720, 'epoch': 1} {'type': 'loss', 'content': 0.18214941024780273, 'timestamp': '2025-09-10 02:27:57.457100', 'step': 1721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.487339', 'step': 1721, 'epoch': 1} {'type': 'loss', 'content': 0.15766936540603638, 'timestamp': '2025-09-10 02:27:57.489605', 'step': 1722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:57.519662', 'step': 1722, 'epoch': 1} {'type': 'loss', 'content': 0.18848255276679993, 'timestamp': '2025-09-10 02:27:57.522076', 'step': 1723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.553140', 'step': 1723, 'epoch': 1} {'type': 'loss', 'content': 0.17397300899028778, 'timestamp': '2025-09-10 02:27:57.576644', 'step': 1724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:57.606972', 'step': 1724, 'epoch': 1} {'type': 'loss', 'content': 0.12007594853639603, 'timestamp': '2025-09-10 02:27:57.608908', 'step': 1725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.638188', 'step': 1725, 'epoch': 1} {'type': 'loss', 'content': 0.15425576269626617, 'timestamp': '2025-09-10 02:27:57.640345', 'step': 1726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.670280', 'step': 1726, 'epoch': 1} {'type': 'loss', 'content': 0.16922862827777863, 'timestamp': '2025-09-10 02:27:57.672741', 'step': 1727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.702823', 'step': 1727, 'epoch': 1} {'type': 'loss', 'content': 0.10397733747959137, 'timestamp': '2025-09-10 02:27:57.726033', 'step': 1728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.757581', 'step': 1728, 'epoch': 1} {'type': 'loss', 'content': 0.24107615649700165, 'timestamp': '2025-09-10 02:27:57.760091', 'step': 1729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:57.790160', 'step': 1729, 'epoch': 1} {'type': 'loss', 'content': 0.18630771338939667, 'timestamp': '2025-09-10 02:27:57.793493', 'step': 1730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:57.825050', 'step': 1730, 'epoch': 1} {'type': 'loss', 'content': 0.08622324466705322, 'timestamp': '2025-09-10 02:27:57.827343', 'step': 1731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:57.857319', 'step': 1731, 'epoch': 1} {'type': 'loss', 'content': 0.13025914132595062, 'timestamp': '2025-09-10 02:27:57.880667', 'step': 1732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:57.911644', 'step': 1732, 'epoch': 1} {'type': 'loss', 'content': 0.2729915976524353, 'timestamp': '2025-09-10 02:27:57.919081', 'step': 1733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.959135', 'step': 1733, 'epoch': 1} {'type': 'loss', 'content': 0.20680738985538483, 'timestamp': '2025-09-10 02:27:57.960987', 'step': 1734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:57.991313', 'step': 1734, 'epoch': 1} {'type': 'loss', 'content': 0.1931629627943039, 'timestamp': '2025-09-10 02:27:57.995239', 'step': 1735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.026449', 'step': 1735, 'epoch': 1} {'type': 'loss', 'content': 0.17931881546974182, 'timestamp': '2025-09-10 02:27:58.050187', 'step': 1736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:58.080494', 'step': 1736, 'epoch': 1} {'type': 'loss', 'content': 0.12654438614845276, 'timestamp': '2025-09-10 02:27:58.082689', 'step': 1737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:58.112817', 'step': 1737, 'epoch': 1} {'type': 'loss', 'content': 0.22083450853824615, 'timestamp': '2025-09-10 02:27:58.115106', 'step': 1738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.144780', 'step': 1738, 'epoch': 1} {'type': 'loss', 'content': 0.19953709840774536, 'timestamp': '2025-09-10 02:27:58.149991', 'step': 1739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.184452', 'step': 1739, 'epoch': 1} {'type': 'loss', 'content': 0.22533640265464783, 'timestamp': '2025-09-10 02:27:58.210154', 'step': 1740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.240645', 'step': 1740, 'epoch': 1} {'type': 'loss', 'content': 0.16640613973140717, 'timestamp': '2025-09-10 02:27:58.242879', 'step': 1741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:58.274678', 'step': 1741, 'epoch': 1} {'type': 'loss', 'content': 0.17531903088092804, 'timestamp': '2025-09-10 02:27:58.276855', 'step': 1742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.306744', 'step': 1742, 'epoch': 1} {'type': 'loss', 'content': 0.16988901793956757, 'timestamp': '2025-09-10 02:27:58.309302', 'step': 1743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.338692', 'step': 1743, 'epoch': 1} {'type': 'loss', 'content': 0.16163156926631927, 'timestamp': '2025-09-10 02:27:58.362472', 'step': 1744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:58.392468', 'step': 1744, 'epoch': 1} {'type': 'loss', 'content': 0.18162901699543, 'timestamp': '2025-09-10 02:27:58.394757', 'step': 1745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:58.426513', 'step': 1745, 'epoch': 1} {'type': 'loss', 'content': 0.1424616128206253, 'timestamp': '2025-09-10 02:27:58.428750', 'step': 1746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.459606', 'step': 1746, 'epoch': 1} {'type': 'loss', 'content': 0.1521730124950409, 'timestamp': '2025-09-10 02:27:58.462850', 'step': 1747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.501443', 'step': 1747, 'epoch': 1} {'type': 'loss', 'content': 0.17499244213104248, 'timestamp': '2025-09-10 02:27:58.524730', 'step': 1748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.556348', 'step': 1748, 'epoch': 1} {'type': 'loss', 'content': 0.1825929582118988, 'timestamp': '2025-09-10 02:27:58.558968', 'step': 1749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.589933', 'step': 1749, 'epoch': 1} {'type': 'loss', 'content': 0.14883959293365479, 'timestamp': '2025-09-10 02:27:58.592106', 'step': 1750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.626525', 'step': 1750, 'epoch': 1} {'type': 'loss', 'content': 0.15298494696617126, 'timestamp': '2025-09-10 02:27:58.628595', 'step': 1751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.658428', 'step': 1751, 'epoch': 1} {'type': 'loss', 'content': 0.26923641562461853, 'timestamp': '2025-09-10 02:27:58.682165', 'step': 1752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.712889', 'step': 1752, 'epoch': 1} {'type': 'loss', 'content': 0.18693144619464874, 'timestamp': '2025-09-10 02:27:58.719051', 'step': 1753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.758055', 'step': 1753, 'epoch': 1} {'type': 'loss', 'content': 0.22223879396915436, 'timestamp': '2025-09-10 02:27:58.760917', 'step': 1754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.791898', 'step': 1754, 'epoch': 1} {'type': 'loss', 'content': 0.09854970872402191, 'timestamp': '2025-09-10 02:27:58.795627', 'step': 1755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.831748', 'step': 1755, 'epoch': 1} {'type': 'loss', 'content': 0.10553727298974991, 'timestamp': '2025-09-10 02:27:58.855756', 'step': 1756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:58.885944', 'step': 1756, 'epoch': 1} {'type': 'loss', 'content': 0.139749214053154, 'timestamp': '2025-09-10 02:27:58.887916', 'step': 1757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.917519', 'step': 1757, 'epoch': 1} {'type': 'loss', 'content': 0.22948960959911346, 'timestamp': '2025-09-10 02:27:58.919485', 'step': 1758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.949334', 'step': 1758, 'epoch': 1} {'type': 'loss', 'content': 0.13459086418151855, 'timestamp': '2025-09-10 02:27:58.951939', 'step': 1759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:27:58.982378', 'step': 1759, 'epoch': 1} {'type': 'loss', 'content': 0.1316644549369812, 'timestamp': '2025-09-10 02:27:59.006148', 'step': 1760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.037348', 'step': 1760, 'epoch': 1} {'type': 'loss', 'content': 0.12826961278915405, 'timestamp': '2025-09-10 02:27:59.039436', 'step': 1761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.070575', 'step': 1761, 'epoch': 1} {'type': 'loss', 'content': 0.18239828944206238, 'timestamp': '2025-09-10 02:27:59.072809', 'step': 1762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:59.102528', 'step': 1762, 'epoch': 1} {'type': 'loss', 'content': 0.17725014686584473, 'timestamp': '2025-09-10 02:27:59.104747', 'step': 1763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:59.137722', 'step': 1763, 'epoch': 1} {'type': 'loss', 'content': 0.21774743497371674, 'timestamp': '2025-09-10 02:27:59.161115', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:59.191548', 'step': 1764, 'epoch': 1} {'type': 'loss', 'content': 0.13094186782836914, 'timestamp': '2025-09-10 02:27:59.194186', 'step': 1765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.224499', 'step': 1765, 'epoch': 1} {'type': 'loss', 'content': 0.1376778781414032, 'timestamp': '2025-09-10 02:27:59.228118', 'step': 1766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.258678', 'step': 1766, 'epoch': 1} {'type': 'loss', 'content': 0.12191940099000931, 'timestamp': '2025-09-10 02:27:59.261304', 'step': 1767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:59.290551', 'step': 1767, 'epoch': 1} {'type': 'loss', 'content': 0.16567887365818024, 'timestamp': '2025-09-10 02:27:59.313908', 'step': 1768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.344350', 'step': 1768, 'epoch': 1} {'type': 'loss', 'content': 0.14355631172657013, 'timestamp': '2025-09-10 02:27:59.352378', 'step': 1769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:59.407758', 'step': 1769, 'epoch': 1} {'type': 'loss', 'content': 0.1991024613380432, 'timestamp': '2025-09-10 02:27:59.410269', 'step': 1770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:59.442375', 'step': 1770, 'epoch': 1} {'type': 'loss', 'content': 0.15009476244449615, 'timestamp': '2025-09-10 02:27:59.444625', 'step': 1771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:59.475132', 'step': 1771, 'epoch': 1} {'type': 'loss', 'content': 0.16447600722312927, 'timestamp': '2025-09-10 02:27:59.498483', 'step': 1772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.528785', 'step': 1772, 'epoch': 1} {'type': 'loss', 'content': 0.25372937321662903, 'timestamp': '2025-09-10 02:27:59.531319', 'step': 1773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:59.561029', 'step': 1773, 'epoch': 1} {'type': 'loss', 'content': 0.23191595077514648, 'timestamp': '2025-09-10 02:27:59.563508', 'step': 1774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:59.595547', 'step': 1774, 'epoch': 1} {'type': 'loss', 'content': 0.2212144434452057, 'timestamp': '2025-09-10 02:27:59.598016', 'step': 1775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.628011', 'step': 1775, 'epoch': 1} {'type': 'loss', 'content': 0.16357958316802979, 'timestamp': '2025-09-10 02:27:59.651659', 'step': 1776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.682304', 'step': 1776, 'epoch': 1} {'type': 'loss', 'content': 0.2485617995262146, 'timestamp': '2025-09-10 02:27:59.684458', 'step': 1777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.714409', 'step': 1777, 'epoch': 1} {'type': 'loss', 'content': 0.18421851098537445, 'timestamp': '2025-09-10 02:27:59.717747', 'step': 1778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.747792', 'step': 1778, 'epoch': 1} {'type': 'loss', 'content': 0.21185708045959473, 'timestamp': '2025-09-10 02:27:59.750080', 'step': 1779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:59.781222', 'step': 1779, 'epoch': 1} {'type': 'loss', 'content': 0.20970658957958221, 'timestamp': '2025-09-10 02:27:59.804911', 'step': 1780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:59.834941', 'step': 1780, 'epoch': 1} {'type': 'loss', 'content': 0.27869245409965515, 'timestamp': '2025-09-10 02:27:59.837377', 'step': 1781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.867620', 'step': 1781, 'epoch': 1} {'type': 'loss', 'content': 0.16497802734375, 'timestamp': '2025-09-10 02:27:59.869949', 'step': 1782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:59.899655', 'step': 1782, 'epoch': 1} {'type': 'loss', 'content': 0.17733316123485565, 'timestamp': '2025-09-10 02:27:59.901843', 'step': 1783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.931645', 'step': 1783, 'epoch': 1} {'type': 'loss', 'content': 0.2358732521533966, 'timestamp': '2025-09-10 02:27:59.955310', 'step': 1784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:59.986363', 'step': 1784, 'epoch': 1} {'type': 'loss', 'content': 0.15336157381534576, 'timestamp': '2025-09-10 02:27:59.988672', 'step': 1785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.021311', 'step': 1785, 'epoch': 1} {'type': 'loss', 'content': 0.07321453839540482, 'timestamp': '2025-09-10 02:28:00.023478', 'step': 1786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.053886', 'step': 1786, 'epoch': 1} {'type': 'loss', 'content': 0.18530048429965973, 'timestamp': '2025-09-10 02:28:00.055922', 'step': 1787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:00.086755', 'step': 1787, 'epoch': 1} {'type': 'loss', 'content': 0.11332438886165619, 'timestamp': '2025-09-10 02:28:00.110479', 'step': 1788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.141901', 'step': 1788, 'epoch': 1} {'type': 'loss', 'content': 0.23127715289592743, 'timestamp': '2025-09-10 02:28:00.144369', 'step': 1789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:00.174332', 'step': 1789, 'epoch': 1} {'type': 'loss', 'content': 0.2221931368112564, 'timestamp': '2025-09-10 02:28:00.176644', 'step': 1790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:00.207801', 'step': 1790, 'epoch': 1} {'type': 'loss', 'content': 0.1468026489019394, 'timestamp': '2025-09-10 02:28:00.210014', 'step': 1791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.240549', 'step': 1791, 'epoch': 1} {'type': 'loss', 'content': 0.20001526176929474, 'timestamp': '2025-09-10 02:28:00.264152', 'step': 1792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.294595', 'step': 1792, 'epoch': 1} {'type': 'loss', 'content': 0.14507892727851868, 'timestamp': '2025-09-10 02:28:00.296844', 'step': 1793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.326826', 'step': 1793, 'epoch': 1} {'type': 'loss', 'content': 0.201049342751503, 'timestamp': '2025-09-10 02:28:00.328811', 'step': 1794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.359282', 'step': 1794, 'epoch': 1} {'type': 'loss', 'content': 0.10216072201728821, 'timestamp': '2025-09-10 02:28:00.361705', 'step': 1795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.392144', 'step': 1795, 'epoch': 1} {'type': 'loss', 'content': 0.2134372889995575, 'timestamp': '2025-09-10 02:28:00.415592', 'step': 1796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.446304', 'step': 1796, 'epoch': 1} {'type': 'loss', 'content': 0.25386902689933777, 'timestamp': '2025-09-10 02:28:00.448626', 'step': 1797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:00.478909', 'step': 1797, 'epoch': 1} {'type': 'loss', 'content': 0.26956707239151, 'timestamp': '2025-09-10 02:28:00.481209', 'step': 1798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.510897', 'step': 1798, 'epoch': 1} {'type': 'loss', 'content': 0.21390816569328308, 'timestamp': '2025-09-10 02:28:00.513458', 'step': 1799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:00.543395', 'step': 1799, 'epoch': 1} {'type': 'loss', 'content': 0.1643916815519333, 'timestamp': '2025-09-10 02:28:00.566861', 'step': 1800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.598759', 'step': 1800, 'epoch': 1} {'type': 'loss', 'content': 0.16984476149082184, 'timestamp': '2025-09-10 02:28:00.601157', 'step': 1801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:00.631602', 'step': 1801, 'epoch': 1} {'type': 'loss', 'content': 0.08413517475128174, 'timestamp': '2025-09-10 02:28:00.633914', 'step': 1802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.663972', 'step': 1802, 'epoch': 1} {'type': 'loss', 'content': 0.13102778792381287, 'timestamp': '2025-09-10 02:28:00.666297', 'step': 1803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.697254', 'step': 1803, 'epoch': 1} {'type': 'loss', 'content': 0.15519951283931732, 'timestamp': '2025-09-10 02:28:00.721040', 'step': 1804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:00.753553', 'step': 1804, 'epoch': 1} {'type': 'loss', 'content': 0.19663743674755096, 'timestamp': '2025-09-10 02:28:00.755943', 'step': 1805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:00.786439', 'step': 1805, 'epoch': 1} {'type': 'loss', 'content': 0.21406328678131104, 'timestamp': '2025-09-10 02:28:00.788482', 'step': 1806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.819032', 'step': 1806, 'epoch': 1} {'type': 'loss', 'content': 0.15627829730510712, 'timestamp': '2025-09-10 02:28:00.821528', 'step': 1807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:00.852431', 'step': 1807, 'epoch': 1} {'type': 'loss', 'content': 0.2084481120109558, 'timestamp': '2025-09-10 02:28:00.876226', 'step': 1808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.906425', 'step': 1808, 'epoch': 1} {'type': 'loss', 'content': 0.23407307267189026, 'timestamp': '2025-09-10 02:28:00.908434', 'step': 1809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:00.938816', 'step': 1809, 'epoch': 1} {'type': 'loss', 'content': 0.18210789561271667, 'timestamp': '2025-09-10 02:28:00.943468', 'step': 1810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:00.973341', 'step': 1810, 'epoch': 1} {'type': 'loss', 'content': 0.14694710075855255, 'timestamp': '2025-09-10 02:28:00.975430', 'step': 1811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.005205', 'step': 1811, 'epoch': 1} {'type': 'loss', 'content': 0.21100787818431854, 'timestamp': '2025-09-10 02:28:01.028760', 'step': 1812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:01.060691', 'step': 1812, 'epoch': 1} {'type': 'loss', 'content': 0.14744804799556732, 'timestamp': '2025-09-10 02:28:01.063033', 'step': 1813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:01.093759', 'step': 1813, 'epoch': 1} {'type': 'loss', 'content': 0.13446791470050812, 'timestamp': '2025-09-10 02:28:01.098263', 'step': 1814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:01.128731', 'step': 1814, 'epoch': 1} {'type': 'loss', 'content': 0.1458081752061844, 'timestamp': '2025-09-10 02:28:01.130891', 'step': 1815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.163137', 'step': 1815, 'epoch': 1} {'type': 'loss', 'content': 0.2330683320760727, 'timestamp': '2025-09-10 02:28:01.186593', 'step': 1816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:01.217669', 'step': 1816, 'epoch': 1} {'type': 'loss', 'content': 0.15337592363357544, 'timestamp': '2025-09-10 02:28:01.219891', 'step': 1817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.252963', 'step': 1817, 'epoch': 1} {'type': 'loss', 'content': 0.0933830663561821, 'timestamp': '2025-09-10 02:28:01.255432', 'step': 1818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:01.286579', 'step': 1818, 'epoch': 1} {'type': 'loss', 'content': 0.15827497839927673, 'timestamp': '2025-09-10 02:28:01.288880', 'step': 1819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.318839', 'step': 1819, 'epoch': 1} {'type': 'loss', 'content': 0.2064393162727356, 'timestamp': '2025-09-10 02:28:01.342431', 'step': 1820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.373439', 'step': 1820, 'epoch': 1} {'type': 'loss', 'content': 0.13825584948062897, 'timestamp': '2025-09-10 02:28:01.375481', 'step': 1821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.405269', 'step': 1821, 'epoch': 1} {'type': 'loss', 'content': 0.148639976978302, 'timestamp': '2025-09-10 02:28:01.407611', 'step': 1822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:01.437997', 'step': 1822, 'epoch': 1} {'type': 'loss', 'content': 0.1361343264579773, 'timestamp': '2025-09-10 02:28:01.440252', 'step': 1823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:01.470506', 'step': 1823, 'epoch': 1} {'type': 'loss', 'content': 0.13619518280029297, 'timestamp': '2025-09-10 02:28:01.495110', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:01.525482', 'step': 1824, 'epoch': 1} {'type': 'loss', 'content': 0.21726815402507782, 'timestamp': '2025-09-10 02:28:01.527810', 'step': 1825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.558315', 'step': 1825, 'epoch': 1} {'type': 'loss', 'content': 0.16176070272922516, 'timestamp': '2025-09-10 02:28:01.560512', 'step': 1826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:01.590631', 'step': 1826, 'epoch': 1} {'type': 'loss', 'content': 0.19065256416797638, 'timestamp': '2025-09-10 02:28:01.592686', 'step': 1827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:01.622953', 'step': 1827, 'epoch': 1} {'type': 'loss', 'content': 0.09945395588874817, 'timestamp': '2025-09-10 02:28:01.646298', 'step': 1828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:01.677396', 'step': 1828, 'epoch': 1} {'type': 'loss', 'content': 0.15330317616462708, 'timestamp': '2025-09-10 02:28:01.679767', 'step': 1829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:01.709660', 'step': 1829, 'epoch': 1} {'type': 'loss', 'content': 0.0644049197435379, 'timestamp': '2025-09-10 02:28:01.711705', 'step': 1830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:01.742250', 'step': 1830, 'epoch': 1} {'type': 'loss', 'content': 0.20391106605529785, 'timestamp': '2025-09-10 02:28:01.745441', 'step': 1831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.776224', 'step': 1831, 'epoch': 1} {'type': 'loss', 'content': 0.17114588618278503, 'timestamp': '2025-09-10 02:28:01.803500', 'step': 1832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:01.845168', 'step': 1832, 'epoch': 1} {'type': 'loss', 'content': 0.2059495896100998, 'timestamp': '2025-09-10 02:28:01.847647', 'step': 1833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.878344', 'step': 1833, 'epoch': 1} {'type': 'loss', 'content': 0.23010441660881042, 'timestamp': '2025-09-10 02:28:01.880527', 'step': 1834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.911127', 'step': 1834, 'epoch': 1} {'type': 'loss', 'content': 0.09750126302242279, 'timestamp': '2025-09-10 02:28:01.913224', 'step': 1835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:01.943387', 'step': 1835, 'epoch': 1} {'type': 'loss', 'content': 0.2046097218990326, 'timestamp': '2025-09-10 02:28:01.967309', 'step': 1836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:01.998128', 'step': 1836, 'epoch': 1} {'type': 'loss', 'content': 0.10935397446155548, 'timestamp': '2025-09-10 02:28:02.001130', 'step': 1837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:02.031972', 'step': 1837, 'epoch': 1} {'type': 'loss', 'content': 0.21670523285865784, 'timestamp': '2025-09-10 02:28:02.034244', 'step': 1838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.063961', 'step': 1838, 'epoch': 1} {'type': 'loss', 'content': 0.17498156428337097, 'timestamp': '2025-09-10 02:28:02.066324', 'step': 1839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.096870', 'step': 1839, 'epoch': 1} {'type': 'loss', 'content': 0.23170694708824158, 'timestamp': '2025-09-10 02:28:02.120329', 'step': 1840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.172030', 'step': 1840, 'epoch': 1} {'type': 'loss', 'content': 0.08964956551790237, 'timestamp': '2025-09-10 02:28:02.174562', 'step': 1841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.205521', 'step': 1841, 'epoch': 1} {'type': 'loss', 'content': 0.14433030784130096, 'timestamp': '2025-09-10 02:28:02.208075', 'step': 1842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:02.238723', 'step': 1842, 'epoch': 1} {'type': 'loss', 'content': 0.15476281940937042, 'timestamp': '2025-09-10 02:28:02.241263', 'step': 1843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.274938', 'step': 1843, 'epoch': 1} {'type': 'loss', 'content': 0.16763898730278015, 'timestamp': '2025-09-10 02:28:02.298854', 'step': 1844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.331187', 'step': 1844, 'epoch': 1} {'type': 'loss', 'content': 0.14807063341140747, 'timestamp': '2025-09-10 02:28:02.333968', 'step': 1845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.365683', 'step': 1845, 'epoch': 1} {'type': 'loss', 'content': 0.20708119869232178, 'timestamp': '2025-09-10 02:28:02.368152', 'step': 1846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:02.400126', 'step': 1846, 'epoch': 1} {'type': 'loss', 'content': 0.14876233041286469, 'timestamp': '2025-09-10 02:28:02.403121', 'step': 1847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.435760', 'step': 1847, 'epoch': 1} {'type': 'loss', 'content': 0.16397763788700104, 'timestamp': '2025-09-10 02:28:02.459320', 'step': 1848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.489248', 'step': 1848, 'epoch': 1} {'type': 'loss', 'content': 0.20524056255817413, 'timestamp': '2025-09-10 02:28:02.491300', 'step': 1849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.522157', 'step': 1849, 'epoch': 1} {'type': 'loss', 'content': 0.1467934101819992, 'timestamp': '2025-09-10 02:28:02.524218', 'step': 1850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.554180', 'step': 1850, 'epoch': 1} {'type': 'loss', 'content': 0.16665641963481903, 'timestamp': '2025-09-10 02:28:02.556257', 'step': 1851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.586170', 'step': 1851, 'epoch': 1} {'type': 'loss', 'content': 0.1930660456418991, 'timestamp': '2025-09-10 02:28:02.609619', 'step': 1852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.640673', 'step': 1852, 'epoch': 1} {'type': 'loss', 'content': 0.22910770773887634, 'timestamp': '2025-09-10 02:28:02.642634', 'step': 1853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.672844', 'step': 1853, 'epoch': 1} {'type': 'loss', 'content': 0.11264991760253906, 'timestamp': '2025-09-10 02:28:02.674805', 'step': 1854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.705731', 'step': 1854, 'epoch': 1} {'type': 'loss', 'content': 0.15041981637477875, 'timestamp': '2025-09-10 02:28:02.707843', 'step': 1855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.737869', 'step': 1855, 'epoch': 1} {'type': 'loss', 'content': 0.12670522928237915, 'timestamp': '2025-09-10 02:28:02.761305', 'step': 1856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:02.792667', 'step': 1856, 'epoch': 1} {'type': 'loss', 'content': 0.15612581372261047, 'timestamp': '2025-09-10 02:28:02.795856', 'step': 1857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.826555', 'step': 1857, 'epoch': 1} {'type': 'loss', 'content': 0.07384287565946579, 'timestamp': '2025-09-10 02:28:02.829520', 'step': 1858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:02.860661', 'step': 1858, 'epoch': 1} {'type': 'loss', 'content': 0.09814490377902985, 'timestamp': '2025-09-10 02:28:02.862716', 'step': 1859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:02.894410', 'step': 1859, 'epoch': 1} {'type': 'loss', 'content': 0.13974197208881378, 'timestamp': '2025-09-10 02:28:02.917846', 'step': 1860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.947713', 'step': 1860, 'epoch': 1} {'type': 'loss', 'content': 0.13781166076660156, 'timestamp': '2025-09-10 02:28:02.950136', 'step': 1861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:02.983019', 'step': 1861, 'epoch': 1} {'type': 'loss', 'content': 0.19274666905403137, 'timestamp': '2025-09-10 02:28:02.985132', 'step': 1862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:03.015484', 'step': 1862, 'epoch': 1} {'type': 'loss', 'content': 0.16862879693508148, 'timestamp': '2025-09-10 02:28:03.018296', 'step': 1863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:03.048452', 'step': 1863, 'epoch': 1} {'type': 'loss', 'content': 0.32433217763900757, 'timestamp': '2025-09-10 02:28:03.073367', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:28:11.158786', 'step': 1864, 'epoch': 1} {'type': 'pplx', 'content': 8742.215478015933, 'timestamp': '2025-09-10 02:28:11.161897', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.191532', 'step': 1864, 'epoch': 1} {'type': 'loss', 'content': 0.1782214492559433, 'timestamp': '2025-09-10 02:28:11.193629', 'step': 1865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.223898', 'step': 1865, 'epoch': 1} {'type': 'loss', 'content': 0.22424182295799255, 'timestamp': '2025-09-10 02:28:11.226007', 'step': 1866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.256093', 'step': 1866, 'epoch': 1} {'type': 'loss', 'content': 0.09020442515611649, 'timestamp': '2025-09-10 02:28:11.259662', 'step': 1867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.298671', 'step': 1867, 'epoch': 1} {'type': 'loss', 'content': 0.2560775876045227, 'timestamp': '2025-09-10 02:28:11.324798', 'step': 1868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.355175', 'step': 1868, 'epoch': 1} {'type': 'loss', 'content': 0.12297984957695007, 'timestamp': '2025-09-10 02:28:11.357910', 'step': 1869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.388598', 'step': 1869, 'epoch': 1} {'type': 'loss', 'content': 0.15044844150543213, 'timestamp': '2025-09-10 02:28:11.393242', 'step': 1870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.429339', 'step': 1870, 'epoch': 1} {'type': 'loss', 'content': 0.2187722623348236, 'timestamp': '2025-09-10 02:28:11.431708', 'step': 1871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.461910', 'step': 1871, 'epoch': 1} {'type': 'loss', 'content': 0.19214722514152527, 'timestamp': '2025-09-10 02:28:11.485346', 'step': 1872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.515554', 'step': 1872, 'epoch': 1} {'type': 'loss', 'content': 0.21121348440647125, 'timestamp': '2025-09-10 02:28:11.517587', 'step': 1873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:11.548763', 'step': 1873, 'epoch': 1} {'type': 'loss', 'content': 0.1310974657535553, 'timestamp': '2025-09-10 02:28:11.550991', 'step': 1874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.581878', 'step': 1874, 'epoch': 1} {'type': 'loss', 'content': 0.1731521040201187, 'timestamp': '2025-09-10 02:28:11.584431', 'step': 1875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.614507', 'step': 1875, 'epoch': 1} {'type': 'loss', 'content': 0.1274680495262146, 'timestamp': '2025-09-10 02:28:11.638316', 'step': 1876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:11.668824', 'step': 1876, 'epoch': 1} {'type': 'loss', 'content': 0.22272181510925293, 'timestamp': '2025-09-10 02:28:11.672438', 'step': 1877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.703549', 'step': 1877, 'epoch': 1} {'type': 'loss', 'content': 0.13734084367752075, 'timestamp': '2025-09-10 02:28:11.705694', 'step': 1878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.736525', 'step': 1878, 'epoch': 1} {'type': 'loss', 'content': 0.12462031096220016, 'timestamp': '2025-09-10 02:28:11.738573', 'step': 1879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.768418', 'step': 1879, 'epoch': 1} {'type': 'loss', 'content': 0.19206523895263672, 'timestamp': '2025-09-10 02:28:11.791719', 'step': 1880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.824287', 'step': 1880, 'epoch': 1} {'type': 'loss', 'content': 0.2104298621416092, 'timestamp': '2025-09-10 02:28:11.826743', 'step': 1881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.856732', 'step': 1881, 'epoch': 1} {'type': 'loss', 'content': 0.09987413883209229, 'timestamp': '2025-09-10 02:28:11.858781', 'step': 1882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.888525', 'step': 1882, 'epoch': 1} {'type': 'loss', 'content': 0.19610042870044708, 'timestamp': '2025-09-10 02:28:11.892009', 'step': 1883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.924912', 'step': 1883, 'epoch': 1} {'type': 'loss', 'content': 0.17431090772151947, 'timestamp': '2025-09-10 02:28:11.948169', 'step': 1884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.980481', 'step': 1884, 'epoch': 1} {'type': 'loss', 'content': 0.12347111105918884, 'timestamp': '2025-09-10 02:28:11.982983', 'step': 1885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.013591', 'step': 1885, 'epoch': 1} {'type': 'loss', 'content': 0.1399887651205063, 'timestamp': '2025-09-10 02:28:12.017624', 'step': 1886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:12.047884', 'step': 1886, 'epoch': 1} {'type': 'loss', 'content': 0.16566480696201324, 'timestamp': '2025-09-10 02:28:12.050362', 'step': 1887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:12.082684', 'step': 1887, 'epoch': 1} {'type': 'loss', 'content': 0.21455590426921844, 'timestamp': '2025-09-10 02:28:12.108778', 'step': 1888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:12.138644', 'step': 1888, 'epoch': 1} {'type': 'loss', 'content': 0.12206806987524033, 'timestamp': '2025-09-10 02:28:12.140416', 'step': 1889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:12.170252', 'step': 1889, 'epoch': 1} {'type': 'loss', 'content': 0.23208500444889069, 'timestamp': '2025-09-10 02:28:12.172253', 'step': 1890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:12.203445', 'step': 1890, 'epoch': 1} {'type': 'loss', 'content': 0.1849374771118164, 'timestamp': '2025-09-10 02:28:12.206391', 'step': 1891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:12.236529', 'step': 1891, 'epoch': 1} {'type': 'loss', 'content': 0.23109014332294464, 'timestamp': '2025-09-10 02:28:12.259776', 'step': 1892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:12.291504', 'step': 1892, 'epoch': 1} {'type': 'loss', 'content': 0.12230906635522842, 'timestamp': '2025-09-10 02:28:12.294311', 'step': 1893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:12.324944', 'step': 1893, 'epoch': 1} {'type': 'loss', 'content': 0.18865303695201874, 'timestamp': '2025-09-10 02:28:12.327126', 'step': 1894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:12.358245', 'step': 1894, 'epoch': 1} {'type': 'loss', 'content': 0.16286851465702057, 'timestamp': '2025-09-10 02:28:12.360275', 'step': 1895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.390365', 'step': 1895, 'epoch': 1} {'type': 'loss', 'content': 0.22352898120880127, 'timestamp': '2025-09-10 02:28:12.415105', 'step': 1896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:12.447502', 'step': 1896, 'epoch': 1} {'type': 'loss', 'content': 0.15978963673114777, 'timestamp': '2025-09-10 02:28:12.449758', 'step': 1897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:12.480102', 'step': 1897, 'epoch': 1} {'type': 'loss', 'content': 0.11086338013410568, 'timestamp': '2025-09-10 02:28:12.483414', 'step': 1898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:12.515174', 'step': 1898, 'epoch': 1} {'type': 'loss', 'content': 0.13270534574985504, 'timestamp': '2025-09-10 02:28:12.516991', 'step': 1899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:12.547219', 'step': 1899, 'epoch': 1} {'type': 'loss', 'content': 0.18299256265163422, 'timestamp': '2025-09-10 02:28:12.570454', 'step': 1900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.600225', 'step': 1900, 'epoch': 1} {'type': 'loss', 'content': 0.24221821129322052, 'timestamp': '2025-09-10 02:28:12.602357', 'step': 1901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.632250', 'step': 1901, 'epoch': 1} {'type': 'loss', 'content': 0.2973771393299103, 'timestamp': '2025-09-10 02:28:12.634092', 'step': 1902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:12.663877', 'step': 1902, 'epoch': 1} {'type': 'loss', 'content': 0.24293245375156403, 'timestamp': '2025-09-10 02:28:12.665652', 'step': 1903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:12.696002', 'step': 1903, 'epoch': 1} {'type': 'loss', 'content': 0.27077314257621765, 'timestamp': '2025-09-10 02:28:12.719535', 'step': 1904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.749757', 'step': 1904, 'epoch': 1} {'type': 'loss', 'content': 0.19303178787231445, 'timestamp': '2025-09-10 02:28:12.751810', 'step': 1905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.781940', 'step': 1905, 'epoch': 1} {'type': 'loss', 'content': 0.22310888767242432, 'timestamp': '2025-09-10 02:28:12.786988', 'step': 1906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:12.816969', 'step': 1906, 'epoch': 1} {'type': 'loss', 'content': 0.1890474110841751, 'timestamp': '2025-09-10 02:28:12.819246', 'step': 1907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:12.849395', 'step': 1907, 'epoch': 1} {'type': 'loss', 'content': 0.23943157494068146, 'timestamp': '2025-09-10 02:28:12.872995', 'step': 1908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:12.903581', 'step': 1908, 'epoch': 1} {'type': 'loss', 'content': 0.1857387274503708, 'timestamp': '2025-09-10 02:28:12.905638', 'step': 1909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:12.935537', 'step': 1909, 'epoch': 1} {'type': 'loss', 'content': 0.19881470501422882, 'timestamp': '2025-09-10 02:28:12.937795', 'step': 1910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.968347', 'step': 1910, 'epoch': 1} {'type': 'loss', 'content': 0.1716008335351944, 'timestamp': '2025-09-10 02:28:12.970608', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:13.000901', 'step': 1911, 'epoch': 1} {'type': 'loss', 'content': 0.2332231104373932, 'timestamp': '2025-09-10 02:28:13.024185', 'step': 1912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:13.055269', 'step': 1912, 'epoch': 1} {'type': 'loss', 'content': 0.14465560019016266, 'timestamp': '2025-09-10 02:28:13.056985', 'step': 1913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:13.086360', 'step': 1913, 'epoch': 1} {'type': 'loss', 'content': 0.17740489542484283, 'timestamp': '2025-09-10 02:28:13.089960', 'step': 1914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:13.119580', 'step': 1914, 'epoch': 1} {'type': 'loss', 'content': 0.27308622002601624, 'timestamp': '2025-09-10 02:28:13.123338', 'step': 1915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:13.168797', 'step': 1915, 'epoch': 1} {'type': 'loss', 'content': 0.1532236784696579, 'timestamp': '2025-09-10 02:28:13.191921', 'step': 1916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:13.221641', 'step': 1916, 'epoch': 1} {'type': 'loss', 'content': 0.15373274683952332, 'timestamp': '2025-09-10 02:28:13.223827', 'step': 1917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:13.254252', 'step': 1917, 'epoch': 1} {'type': 'loss', 'content': 0.1672838181257248, 'timestamp': '2025-09-10 02:28:13.257126', 'step': 1918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:13.288295', 'step': 1918, 'epoch': 1} {'type': 'loss', 'content': 0.15839113295078278, 'timestamp': '2025-09-10 02:28:13.290421', 'step': 1919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:13.322029', 'step': 1919, 'epoch': 1} {'type': 'loss', 'content': 0.18662837147712708, 'timestamp': '2025-09-10 02:28:13.345528', 'step': 1920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:13.375688', 'step': 1920, 'epoch': 1} {'type': 'loss', 'content': 0.2252911776304245, 'timestamp': '2025-09-10 02:28:13.378233', 'step': 1921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:13.408510', 'step': 1921, 'epoch': 1} {'type': 'loss', 'content': 0.14219199120998383, 'timestamp': '2025-09-10 02:28:13.410720', 'step': 1922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:13.441401', 'step': 1922, 'epoch': 1} {'type': 'loss', 'content': 0.1556404083967209, 'timestamp': '2025-09-10 02:28:13.443766', 'step': 1923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:13.474160', 'step': 1923, 'epoch': 1} {'type': 'loss', 'content': 0.21007688343524933, 'timestamp': '2025-09-10 02:28:13.497627', 'step': 1924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:13.529823', 'step': 1924, 'epoch': 1} {'type': 'loss', 'content': 0.22090807557106018, 'timestamp': '2025-09-10 02:28:13.531898', 'step': 1925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:13.561746', 'step': 1925, 'epoch': 1} {'type': 'loss', 'content': 0.22848224639892578, 'timestamp': '2025-09-10 02:28:13.564189', 'step': 1926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:13.594537', 'step': 1926, 'epoch': 1} {'type': 'loss', 'content': 0.1182636022567749, 'timestamp': '2025-09-10 02:28:13.598312', 'step': 1927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:13.628563', 'step': 1927, 'epoch': 1} {'type': 'loss', 'content': 0.2426762580871582, 'timestamp': '2025-09-10 02:28:13.652080', 'step': 1928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:13.683345', 'step': 1928, 'epoch': 1} {'type': 'loss', 'content': 0.19810590147972107, 'timestamp': '2025-09-10 02:28:13.686477', 'step': 1929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:13.717274', 'step': 1929, 'epoch': 1} {'type': 'loss', 'content': 0.18810871243476868, 'timestamp': '2025-09-10 02:28:13.719526', 'step': 1930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:13.749463', 'step': 1930, 'epoch': 1} {'type': 'loss', 'content': 0.1924779862165451, 'timestamp': '2025-09-10 02:28:13.752565', 'step': 1931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:13.785600', 'step': 1931, 'epoch': 1} {'type': 'loss', 'content': 0.12867656350135803, 'timestamp': '2025-09-10 02:28:13.810172', 'step': 1932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:13.840686', 'step': 1932, 'epoch': 1} {'type': 'loss', 'content': 0.2277335524559021, 'timestamp': '2025-09-10 02:28:13.842854', 'step': 1933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:13.873350', 'step': 1933, 'epoch': 1} {'type': 'loss', 'content': 0.17774514853954315, 'timestamp': '2025-09-10 02:28:13.875598', 'step': 1934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:13.906348', 'step': 1934, 'epoch': 1} {'type': 'loss', 'content': 0.12361092865467072, 'timestamp': '2025-09-10 02:28:13.908583', 'step': 1935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:13.938801', 'step': 1935, 'epoch': 1} {'type': 'loss', 'content': 0.21394190192222595, 'timestamp': '2025-09-10 02:28:13.962165', 'step': 1936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:13.996420', 'step': 1936, 'epoch': 1} {'type': 'loss', 'content': 0.14989115297794342, 'timestamp': '2025-09-10 02:28:13.998450', 'step': 1937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:14.030482', 'step': 1937, 'epoch': 1} {'type': 'loss', 'content': 0.252592533826828, 'timestamp': '2025-09-10 02:28:14.032447', 'step': 1938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:14.061877', 'step': 1938, 'epoch': 1} {'type': 'loss', 'content': 0.20428062975406647, 'timestamp': '2025-09-10 02:28:14.063807', 'step': 1939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:14.093369', 'step': 1939, 'epoch': 1} {'type': 'loss', 'content': 0.16946138441562653, 'timestamp': '2025-09-10 02:28:14.116642', 'step': 1940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:14.147022', 'step': 1940, 'epoch': 1} {'type': 'loss', 'content': 0.27103349566459656, 'timestamp': '2025-09-10 02:28:14.149134', 'step': 1941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.178883', 'step': 1941, 'epoch': 1} {'type': 'loss', 'content': 0.1903352290391922, 'timestamp': '2025-09-10 02:28:14.180639', 'step': 1942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:14.210658', 'step': 1942, 'epoch': 1} {'type': 'loss', 'content': 0.1418127715587616, 'timestamp': '2025-09-10 02:28:14.212790', 'step': 1943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:14.243168', 'step': 1943, 'epoch': 1} {'type': 'loss', 'content': 0.22472088038921356, 'timestamp': '2025-09-10 02:28:14.266789', 'step': 1944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:14.297684', 'step': 1944, 'epoch': 1} {'type': 'loss', 'content': 0.2698788344860077, 'timestamp': '2025-09-10 02:28:14.299838', 'step': 1945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:14.330660', 'step': 1945, 'epoch': 1} {'type': 'loss', 'content': 0.14269371330738068, 'timestamp': '2025-09-10 02:28:14.332842', 'step': 1946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:14.362672', 'step': 1946, 'epoch': 1} {'type': 'loss', 'content': 0.19395586848258972, 'timestamp': '2025-09-10 02:28:14.365057', 'step': 1947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:14.395824', 'step': 1947, 'epoch': 1} {'type': 'loss', 'content': 0.1720629185438156, 'timestamp': '2025-09-10 02:28:14.421504', 'step': 1948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.452257', 'step': 1948, 'epoch': 1} {'type': 'loss', 'content': 0.31942978501319885, 'timestamp': '2025-09-10 02:28:14.454134', 'step': 1949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.485050', 'step': 1949, 'epoch': 1} {'type': 'loss', 'content': 0.19014540314674377, 'timestamp': '2025-09-10 02:28:14.486913', 'step': 1950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:14.517553', 'step': 1950, 'epoch': 1} {'type': 'loss', 'content': 0.27795735001564026, 'timestamp': '2025-09-10 02:28:14.521752', 'step': 1951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.558976', 'step': 1951, 'epoch': 1} {'type': 'loss', 'content': 0.1361798644065857, 'timestamp': '2025-09-10 02:28:14.585623', 'step': 1952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:14.616187', 'step': 1952, 'epoch': 1} {'type': 'loss', 'content': 0.1745983213186264, 'timestamp': '2025-09-10 02:28:14.618093', 'step': 1953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.647944', 'step': 1953, 'epoch': 1} {'type': 'loss', 'content': 0.11638262867927551, 'timestamp': '2025-09-10 02:28:14.649682', 'step': 1954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:14.679969', 'step': 1954, 'epoch': 1} {'type': 'loss', 'content': 0.13753972947597504, 'timestamp': '2025-09-10 02:28:14.681954', 'step': 1955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:14.712416', 'step': 1955, 'epoch': 1} {'type': 'loss', 'content': 0.14862102270126343, 'timestamp': '2025-09-10 02:28:14.735492', 'step': 1956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.765742', 'step': 1956, 'epoch': 1} {'type': 'loss', 'content': 0.2290126383304596, 'timestamp': '2025-09-10 02:28:14.767754', 'step': 1957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.797679', 'step': 1957, 'epoch': 1} {'type': 'loss', 'content': 0.1978795975446701, 'timestamp': '2025-09-10 02:28:14.799967', 'step': 1958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:14.829799', 'step': 1958, 'epoch': 1} {'type': 'loss', 'content': 0.17550013959407806, 'timestamp': '2025-09-10 02:28:14.832080', 'step': 1959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:14.862329', 'step': 1959, 'epoch': 1} {'type': 'loss', 'content': 0.1844499558210373, 'timestamp': '2025-09-10 02:28:14.885575', 'step': 1960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.916614', 'step': 1960, 'epoch': 1} {'type': 'loss', 'content': 0.1330249160528183, 'timestamp': '2025-09-10 02:28:14.919217', 'step': 1961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:14.949471', 'step': 1961, 'epoch': 1} {'type': 'loss', 'content': 0.14656919240951538, 'timestamp': '2025-09-10 02:28:14.951858', 'step': 1962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.981822', 'step': 1962, 'epoch': 1} {'type': 'loss', 'content': 0.11858045309782028, 'timestamp': '2025-09-10 02:28:14.983823', 'step': 1963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.014286', 'step': 1963, 'epoch': 1} {'type': 'loss', 'content': 0.12393977493047714, 'timestamp': '2025-09-10 02:28:15.037414', 'step': 1964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:15.067519', 'step': 1964, 'epoch': 1} {'type': 'loss', 'content': 0.1655162274837494, 'timestamp': '2025-09-10 02:28:15.069529', 'step': 1965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:15.099944', 'step': 1965, 'epoch': 1} {'type': 'loss', 'content': 0.12320996820926666, 'timestamp': '2025-09-10 02:28:15.101989', 'step': 1966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:15.137856', 'step': 1966, 'epoch': 1} {'type': 'loss', 'content': 0.35953617095947266, 'timestamp': '2025-09-10 02:28:15.139806', 'step': 1967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:15.169827', 'step': 1967, 'epoch': 1} {'type': 'loss', 'content': 0.14375163614749908, 'timestamp': '2025-09-10 02:28:15.194138', 'step': 1968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:15.224259', 'step': 1968, 'epoch': 1} {'type': 'loss', 'content': 0.16286785900592804, 'timestamp': '2025-09-10 02:28:15.226184', 'step': 1969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.256053', 'step': 1969, 'epoch': 1} {'type': 'loss', 'content': 0.14355678856372833, 'timestamp': '2025-09-10 02:28:15.258188', 'step': 1970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.287908', 'step': 1970, 'epoch': 1} {'type': 'loss', 'content': 0.2566851079463959, 'timestamp': '2025-09-10 02:28:15.290112', 'step': 1971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.320497', 'step': 1971, 'epoch': 1} {'type': 'loss', 'content': 0.12263981252908707, 'timestamp': '2025-09-10 02:28:15.344309', 'step': 1972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.374837', 'step': 1972, 'epoch': 1} {'type': 'loss', 'content': 0.14601808786392212, 'timestamp': '2025-09-10 02:28:15.376952', 'step': 1973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.407193', 'step': 1973, 'epoch': 1} {'type': 'loss', 'content': 0.20523104071617126, 'timestamp': '2025-09-10 02:28:15.413246', 'step': 1974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:15.448034', 'step': 1974, 'epoch': 1} {'type': 'loss', 'content': 0.16482950747013092, 'timestamp': '2025-09-10 02:28:15.450103', 'step': 1975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:15.480540', 'step': 1975, 'epoch': 1} {'type': 'loss', 'content': 0.15440456569194794, 'timestamp': '2025-09-10 02:28:15.504225', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:15.533937', 'step': 1976, 'epoch': 1} {'type': 'loss', 'content': 0.17852061986923218, 'timestamp': '2025-09-10 02:28:15.535731', 'step': 1977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.565945', 'step': 1977, 'epoch': 1} {'type': 'loss', 'content': 0.19189292192459106, 'timestamp': '2025-09-10 02:28:15.568224', 'step': 1978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.598077', 'step': 1978, 'epoch': 1} {'type': 'loss', 'content': 0.26805388927459717, 'timestamp': '2025-09-10 02:28:15.600003', 'step': 1979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.630515', 'step': 1979, 'epoch': 1} {'type': 'loss', 'content': 0.15028800070285797, 'timestamp': '2025-09-10 02:28:15.653791', 'step': 1980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.684549', 'step': 1980, 'epoch': 1} {'type': 'loss', 'content': 0.19015605747699738, 'timestamp': '2025-09-10 02:28:15.696338', 'step': 1981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:15.728168', 'step': 1981, 'epoch': 1} {'type': 'loss', 'content': 0.2552136778831482, 'timestamp': '2025-09-10 02:28:15.730433', 'step': 1982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.759593', 'step': 1982, 'epoch': 1} {'type': 'loss', 'content': 0.21769046783447266, 'timestamp': '2025-09-10 02:28:15.762417', 'step': 1983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:15.792770', 'step': 1983, 'epoch': 1} {'type': 'loss', 'content': 0.24611973762512207, 'timestamp': '2025-09-10 02:28:15.817462', 'step': 1984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.849391', 'step': 1984, 'epoch': 1} {'type': 'loss', 'content': 0.13800467550754547, 'timestamp': '2025-09-10 02:28:15.851621', 'step': 1985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:15.882044', 'step': 1985, 'epoch': 1} {'type': 'loss', 'content': 0.141575425863266, 'timestamp': '2025-09-10 02:28:15.884561', 'step': 1986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.915773', 'step': 1986, 'epoch': 1} {'type': 'loss', 'content': 0.24369969964027405, 'timestamp': '2025-09-10 02:28:15.918122', 'step': 1987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.948473', 'step': 1987, 'epoch': 1} {'type': 'loss', 'content': 0.20963731408119202, 'timestamp': '2025-09-10 02:28:15.972238', 'step': 1988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.002915', 'step': 1988, 'epoch': 1} {'type': 'loss', 'content': 0.2615450620651245, 'timestamp': '2025-09-10 02:28:16.004630', 'step': 1989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:16.034623', 'step': 1989, 'epoch': 1} {'type': 'loss', 'content': 0.14331799745559692, 'timestamp': '2025-09-10 02:28:16.036660', 'step': 1990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:16.066978', 'step': 1990, 'epoch': 1} {'type': 'loss', 'content': 0.16791968047618866, 'timestamp': '2025-09-10 02:28:16.069607', 'step': 1991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.099367', 'step': 1991, 'epoch': 1} {'type': 'loss', 'content': 0.16265517473220825, 'timestamp': '2025-09-10 02:28:16.122542', 'step': 1992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.153665', 'step': 1992, 'epoch': 1} {'type': 'loss', 'content': 0.17377731204032898, 'timestamp': '2025-09-10 02:28:16.155681', 'step': 1993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:16.184940', 'step': 1993, 'epoch': 1} {'type': 'loss', 'content': 0.19561509788036346, 'timestamp': '2025-09-10 02:28:16.186970', 'step': 1994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.218065', 'step': 1994, 'epoch': 1} {'type': 'loss', 'content': 0.21934069693088531, 'timestamp': '2025-09-10 02:28:16.220269', 'step': 1995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.250797', 'step': 1995, 'epoch': 1} {'type': 'loss', 'content': 0.16812334954738617, 'timestamp': '2025-09-10 02:28:16.274494', 'step': 1996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:16.304888', 'step': 1996, 'epoch': 1} {'type': 'loss', 'content': 0.20925018191337585, 'timestamp': '2025-09-10 02:28:16.306721', 'step': 1997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:16.338072', 'step': 1997, 'epoch': 1} {'type': 'loss', 'content': 0.2503630220890045, 'timestamp': '2025-09-10 02:28:16.340302', 'step': 1998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.370530', 'step': 1998, 'epoch': 1} {'type': 'loss', 'content': 0.15115810930728912, 'timestamp': '2025-09-10 02:28:16.372647', 'step': 1999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:16.402443', 'step': 1999, 'epoch': 1} {'type': 'loss', 'content': 0.1630813628435135, 'timestamp': '2025-09-10 02:28:16.432266', 'step': 2000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-09-10 02:28:21.815134', 'step': 2000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.885331', 'step': 2000, 'epoch': 1} {'type': 'loss', 'content': 0.15216028690338135, 'timestamp': '2025-09-10 02:28:21.905936', 'step': 2001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:21.979634', 'step': 2001, 'epoch': 1} {'type': 'loss', 'content': 0.24490682780742645, 'timestamp': '2025-09-10 02:28:21.989511', 'step': 2002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.059011', 'step': 2002, 'epoch': 1} {'type': 'loss', 'content': 0.18816663324832916, 'timestamp': '2025-09-10 02:28:22.065327', 'step': 2003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:22.103768', 'step': 2003, 'epoch': 1} {'type': 'loss', 'content': 0.21531708538532257, 'timestamp': '2025-09-10 02:28:22.132918', 'step': 2004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:22.203047', 'step': 2004, 'epoch': 1} {'type': 'loss', 'content': 0.1278688758611679, 'timestamp': '2025-09-10 02:28:22.215590', 'step': 2005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.293322', 'step': 2005, 'epoch': 1} {'type': 'loss', 'content': 0.14414195716381073, 'timestamp': '2025-09-10 02:28:22.302712', 'step': 2006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:22.356134', 'step': 2006, 'epoch': 1} {'type': 'loss', 'content': 0.2316093146800995, 'timestamp': '2025-09-10 02:28:22.371465', 'step': 2007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:22.454590', 'step': 2007, 'epoch': 1} {'type': 'loss', 'content': 0.2040223330259323, 'timestamp': '2025-09-10 02:28:22.500275', 'step': 2008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.628629', 'step': 2008, 'epoch': 1} {'type': 'loss', 'content': 0.1216181069612503, 'timestamp': '2025-09-10 02:28:22.652177', 'step': 2009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.725945', 'step': 2009, 'epoch': 1} {'type': 'loss', 'content': 0.19824667274951935, 'timestamp': '2025-09-10 02:28:22.736856', 'step': 2010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:22.785377', 'step': 2010, 'epoch': 1} {'type': 'loss', 'content': 0.1164061576128006, 'timestamp': '2025-09-10 02:28:22.808169', 'step': 2011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.905366', 'step': 2011, 'epoch': 1} {'type': 'loss', 'content': 0.16871310770511627, 'timestamp': '2025-09-10 02:28:22.936969', 'step': 2012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:22.985665', 'step': 2012, 'epoch': 1} {'type': 'loss', 'content': 0.3081686496734619, 'timestamp': '2025-09-10 02:28:22.996632', 'step': 2013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:23.032927', 'step': 2013, 'epoch': 1} {'type': 'loss', 'content': 0.22704464197158813, 'timestamp': '2025-09-10 02:28:23.035599', 'step': 2014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.070709', 'step': 2014, 'epoch': 1} {'type': 'loss', 'content': 0.13298898935317993, 'timestamp': '2025-09-10 02:28:23.075480', 'step': 2015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:23.112508', 'step': 2015, 'epoch': 1} {'type': 'loss', 'content': 0.0983768105506897, 'timestamp': '2025-09-10 02:28:23.138382', 'step': 2016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:23.183233', 'step': 2016, 'epoch': 1} {'type': 'loss', 'content': 0.11191599816083908, 'timestamp': '2025-09-10 02:28:23.189992', 'step': 2017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:23.249279', 'step': 2017, 'epoch': 1} {'type': 'loss', 'content': 0.18497775495052338, 'timestamp': '2025-09-10 02:28:23.257810', 'step': 2018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.312804', 'step': 2018, 'epoch': 1} {'type': 'loss', 'content': 0.15634317696094513, 'timestamp': '2025-09-10 02:28:23.324878', 'step': 2019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.403356', 'step': 2019, 'epoch': 1} {'type': 'loss', 'content': 0.28492674231529236, 'timestamp': '2025-09-10 02:28:23.430061', 'step': 2020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.471423', 'step': 2020, 'epoch': 1} {'type': 'loss', 'content': 0.18330717086791992, 'timestamp': '2025-09-10 02:28:23.473868', 'step': 2021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.505400', 'step': 2021, 'epoch': 1} {'type': 'loss', 'content': 0.16952545940876007, 'timestamp': '2025-09-10 02:28:23.507840', 'step': 2022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.537970', 'step': 2022, 'epoch': 1} {'type': 'loss', 'content': 0.2380812019109726, 'timestamp': '2025-09-10 02:28:23.542726', 'step': 2023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.578691', 'step': 2023, 'epoch': 1} {'type': 'loss', 'content': 0.15592481195926666, 'timestamp': '2025-09-10 02:28:23.602694', 'step': 2024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:23.637602', 'step': 2024, 'epoch': 1} {'type': 'loss', 'content': 0.23622028529644012, 'timestamp': '2025-09-10 02:28:23.645314', 'step': 2025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:23.683244', 'step': 2025, 'epoch': 1} {'type': 'loss', 'content': 0.23267529904842377, 'timestamp': '2025-09-10 02:28:23.686478', 'step': 2026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.719155', 'step': 2026, 'epoch': 1} {'type': 'loss', 'content': 0.17125366628170013, 'timestamp': '2025-09-10 02:28:23.721198', 'step': 2027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.751698', 'step': 2027, 'epoch': 1} {'type': 'loss', 'content': 0.12863348424434662, 'timestamp': '2025-09-10 02:28:23.792179', 'step': 2028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.852860', 'step': 2028, 'epoch': 1} {'type': 'loss', 'content': 0.15916742384433746, 'timestamp': '2025-09-10 02:28:23.859581', 'step': 2029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.905249', 'step': 2029, 'epoch': 1} {'type': 'loss', 'content': 0.17218859493732452, 'timestamp': '2025-09-10 02:28:23.916020', 'step': 2030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:23.970236', 'step': 2030, 'epoch': 1} {'type': 'loss', 'content': 0.1215573400259018, 'timestamp': '2025-09-10 02:28:23.983479', 'step': 2031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:24.035069', 'step': 2031, 'epoch': 1} {'type': 'loss', 'content': 0.14217814803123474, 'timestamp': '2025-09-10 02:28:24.066629', 'step': 2032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:24.119129', 'step': 2032, 'epoch': 1} {'type': 'loss', 'content': 0.2995034456253052, 'timestamp': '2025-09-10 02:28:24.124308', 'step': 2033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:24.178599', 'step': 2033, 'epoch': 1} {'type': 'loss', 'content': 0.14968131482601166, 'timestamp': '2025-09-10 02:28:24.180875', 'step': 2034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.212082', 'step': 2034, 'epoch': 1} {'type': 'loss', 'content': 0.08715160191059113, 'timestamp': '2025-09-10 02:28:24.214353', 'step': 2035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.244914', 'step': 2035, 'epoch': 1} {'type': 'loss', 'content': 0.2605234682559967, 'timestamp': '2025-09-10 02:28:24.268415', 'step': 2036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:24.312458', 'step': 2036, 'epoch': 1} {'type': 'loss', 'content': 0.15973365306854248, 'timestamp': '2025-09-10 02:28:24.315261', 'step': 2037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:24.346880', 'step': 2037, 'epoch': 1} {'type': 'loss', 'content': 0.2778756320476532, 'timestamp': '2025-09-10 02:28:24.349609', 'step': 2038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:24.384359', 'step': 2038, 'epoch': 1} {'type': 'loss', 'content': 0.21622471511363983, 'timestamp': '2025-09-10 02:28:24.386563', 'step': 2039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:24.420870', 'step': 2039, 'epoch': 1} {'type': 'loss', 'content': 0.1393737941980362, 'timestamp': '2025-09-10 02:28:24.454864', 'step': 2040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:24.486541', 'step': 2040, 'epoch': 1} {'type': 'loss', 'content': 0.18049341440200806, 'timestamp': '2025-09-10 02:28:24.490177', 'step': 2041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:24.522138', 'step': 2041, 'epoch': 1} {'type': 'loss', 'content': 0.13663119077682495, 'timestamp': '2025-09-10 02:28:24.525220', 'step': 2042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:24.562123', 'step': 2042, 'epoch': 1} {'type': 'loss', 'content': 0.21012458205223083, 'timestamp': '2025-09-10 02:28:24.574486', 'step': 2043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:24.617499', 'step': 2043, 'epoch': 1} {'type': 'loss', 'content': 0.09832482784986496, 'timestamp': '2025-09-10 02:28:24.643184', 'step': 2044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:24.688026', 'step': 2044, 'epoch': 1} {'type': 'loss', 'content': 0.20634882152080536, 'timestamp': '2025-09-10 02:28:24.698146', 'step': 2045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:24.754506', 'step': 2045, 'epoch': 1} {'type': 'loss', 'content': 0.0847434476017952, 'timestamp': '2025-09-10 02:28:24.771298', 'step': 2046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:24.820438', 'step': 2046, 'epoch': 1} {'type': 'loss', 'content': 0.17834530770778656, 'timestamp': '2025-09-10 02:28:24.824430', 'step': 2047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:24.873459', 'step': 2047, 'epoch': 1} {'type': 'loss', 'content': 0.21130922436714172, 'timestamp': '2025-09-10 02:28:24.899059', 'step': 2048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:24.950818', 'step': 2048, 'epoch': 1} {'type': 'loss', 'content': 0.11740322411060333, 'timestamp': '2025-09-10 02:28:24.955979', 'step': 2049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.012524', 'step': 2049, 'epoch': 1} {'type': 'loss', 'content': 0.17851778864860535, 'timestamp': '2025-09-10 02:28:25.014482', 'step': 2050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.075850', 'step': 2050, 'epoch': 1} {'type': 'loss', 'content': 0.1969224512577057, 'timestamp': '2025-09-10 02:28:25.083055', 'step': 2051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:25.115823', 'step': 2051, 'epoch': 1} {'type': 'loss', 'content': 0.16419042646884918, 'timestamp': '2025-09-10 02:28:25.139791', 'step': 2052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.176574', 'step': 2052, 'epoch': 1} {'type': 'loss', 'content': 0.24429571628570557, 'timestamp': '2025-09-10 02:28:25.179323', 'step': 2053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:25.214680', 'step': 2053, 'epoch': 1} {'type': 'loss', 'content': 0.14651544392108917, 'timestamp': '2025-09-10 02:28:25.216908', 'step': 2054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:25.260374', 'step': 2054, 'epoch': 1} {'type': 'loss', 'content': 0.1254822313785553, 'timestamp': '2025-09-10 02:28:25.262896', 'step': 2055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.294511', 'step': 2055, 'epoch': 1} {'type': 'loss', 'content': 0.1317797154188156, 'timestamp': '2025-09-10 02:28:25.318886', 'step': 2056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:25.354405', 'step': 2056, 'epoch': 1} {'type': 'loss', 'content': 0.1295040100812912, 'timestamp': '2025-09-10 02:28:25.357599', 'step': 2057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:25.570049', 'step': 2057, 'epoch': 1} {'type': 'loss', 'content': 0.2254646122455597, 'timestamp': '2025-09-10 02:28:25.572219', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.603216', 'step': 2058, 'epoch': 1} {'type': 'loss', 'content': 0.12972398102283478, 'timestamp': '2025-09-10 02:28:25.605705', 'step': 2059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.643038', 'step': 2059, 'epoch': 1} {'type': 'loss', 'content': 0.12336168438196182, 'timestamp': '2025-09-10 02:28:25.669424', 'step': 2060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.703884', 'step': 2060, 'epoch': 1} {'type': 'loss', 'content': 0.14744672179222107, 'timestamp': '2025-09-10 02:28:25.706045', 'step': 2061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.736733', 'step': 2061, 'epoch': 1} {'type': 'loss', 'content': 0.1392751932144165, 'timestamp': '2025-09-10 02:28:25.738749', 'step': 2062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.769777', 'step': 2062, 'epoch': 1} {'type': 'loss', 'content': 0.19284160435199738, 'timestamp': '2025-09-10 02:28:25.771792', 'step': 2063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.802143', 'step': 2063, 'epoch': 1} {'type': 'loss', 'content': 0.25116491317749023, 'timestamp': '2025-09-10 02:28:25.826193', 'step': 2064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:25.864308', 'step': 2064, 'epoch': 1} {'type': 'loss', 'content': 0.16784878075122833, 'timestamp': '2025-09-10 02:28:25.866301', 'step': 2065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:25.897565', 'step': 2065, 'epoch': 1} {'type': 'loss', 'content': 0.15742647647857666, 'timestamp': '2025-09-10 02:28:25.899977', 'step': 2066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:25.931388', 'step': 2066, 'epoch': 1} {'type': 'loss', 'content': 0.19071127474308014, 'timestamp': '2025-09-10 02:28:25.934356', 'step': 2067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.973475', 'step': 2067, 'epoch': 1} {'type': 'loss', 'content': 0.15307649970054626, 'timestamp': '2025-09-10 02:28:25.997025', 'step': 2068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.064077', 'step': 2068, 'epoch': 1} {'type': 'loss', 'content': 0.2095184624195099, 'timestamp': '2025-09-10 02:28:26.294761', 'step': 2069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:26.343459', 'step': 2069, 'epoch': 1} {'type': 'loss', 'content': 0.16398750245571136, 'timestamp': '2025-09-10 02:28:26.354144', 'step': 2070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:26.406130', 'step': 2070, 'epoch': 1} {'type': 'loss', 'content': 0.23923704028129578, 'timestamp': '2025-09-10 02:28:26.414635', 'step': 2071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:26.471646', 'step': 2071, 'epoch': 1} {'type': 'loss', 'content': 0.19992855191230774, 'timestamp': '2025-09-10 02:28:26.514355', 'step': 2072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:26.579019', 'step': 2072, 'epoch': 1} {'type': 'loss', 'content': 0.15204524993896484, 'timestamp': '2025-09-10 02:28:26.591511', 'step': 2073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:26.642447', 'step': 2073, 'epoch': 1} {'type': 'loss', 'content': 0.1368226259946823, 'timestamp': '2025-09-10 02:28:26.651122', 'step': 2074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:26.705470', 'step': 2074, 'epoch': 1} {'type': 'loss', 'content': 0.13295233249664307, 'timestamp': '2025-09-10 02:28:26.712399', 'step': 2075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.771800', 'step': 2075, 'epoch': 1} {'type': 'loss', 'content': 0.12923946976661682, 'timestamp': '2025-09-10 02:28:26.799467', 'step': 2076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:26.852676', 'step': 2076, 'epoch': 1} {'type': 'loss', 'content': 0.2871311902999878, 'timestamp': '2025-09-10 02:28:26.858698', 'step': 2077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:26.923079', 'step': 2077, 'epoch': 1} {'type': 'loss', 'content': 0.20870347321033478, 'timestamp': '2025-09-10 02:28:26.929401', 'step': 2078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:26.990205', 'step': 2078, 'epoch': 1} {'type': 'loss', 'content': 0.13288265466690063, 'timestamp': '2025-09-10 02:28:26.999990', 'step': 2079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:27.087471', 'step': 2079, 'epoch': 1} {'type': 'loss', 'content': 0.190845787525177, 'timestamp': '2025-09-10 02:28:27.126746', 'step': 2080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:27.436394', 'step': 2080, 'epoch': 1} {'type': 'loss', 'content': 0.1790739744901657, 'timestamp': '2025-09-10 02:28:27.440874', 'step': 2081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:27.492470', 'step': 2081, 'epoch': 1} {'type': 'loss', 'content': 0.12038411945104599, 'timestamp': '2025-09-10 02:28:27.500676', 'step': 2082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:27.539827', 'step': 2082, 'epoch': 1} {'type': 'loss', 'content': 0.14600488543510437, 'timestamp': '2025-09-10 02:28:27.543269', 'step': 2083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:27.574916', 'step': 2083, 'epoch': 1} {'type': 'loss', 'content': 0.12625983357429504, 'timestamp': '2025-09-10 02:28:27.599431', 'step': 2084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.633386', 'step': 2084, 'epoch': 1} {'type': 'loss', 'content': 0.13877111673355103, 'timestamp': '2025-09-10 02:28:27.635891', 'step': 2085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:27.669086', 'step': 2085, 'epoch': 1} {'type': 'loss', 'content': 0.12987388670444489, 'timestamp': '2025-09-10 02:28:27.673203', 'step': 2086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:27.714930', 'step': 2086, 'epoch': 1} {'type': 'loss', 'content': 0.21225953102111816, 'timestamp': '2025-09-10 02:28:27.717532', 'step': 2087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:27.749072', 'step': 2087, 'epoch': 1} {'type': 'loss', 'content': 0.11717372387647629, 'timestamp': '2025-09-10 02:28:27.772618', 'step': 2088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:27.805470', 'step': 2088, 'epoch': 1} {'type': 'loss', 'content': 0.16417436301708221, 'timestamp': '2025-09-10 02:28:27.808127', 'step': 2089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:27.841388', 'step': 2089, 'epoch': 1} {'type': 'loss', 'content': 0.12288971990346909, 'timestamp': '2025-09-10 02:28:27.844252', 'step': 2090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:27.882416', 'step': 2090, 'epoch': 1} {'type': 'loss', 'content': 0.20360679924488068, 'timestamp': '2025-09-10 02:28:27.884739', 'step': 2091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:27.916099', 'step': 2091, 'epoch': 1} {'type': 'loss', 'content': 0.20492655038833618, 'timestamp': '2025-09-10 02:28:27.939817', 'step': 2092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:27.987452', 'step': 2092, 'epoch': 1} {'type': 'loss', 'content': 0.28362762928009033, 'timestamp': '2025-09-10 02:28:27.990132', 'step': 2093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:28.030902', 'step': 2093, 'epoch': 1} {'type': 'loss', 'content': 0.1712198704481125, 'timestamp': '2025-09-10 02:28:28.033920', 'step': 2094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:28.066858', 'step': 2094, 'epoch': 1} {'type': 'loss', 'content': 0.1285078078508377, 'timestamp': '2025-09-10 02:28:28.072528', 'step': 2095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:28.108122', 'step': 2095, 'epoch': 1} {'type': 'loss', 'content': 0.11839515715837479, 'timestamp': '2025-09-10 02:28:28.140270', 'step': 2096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:28.181721', 'step': 2096, 'epoch': 1} {'type': 'loss', 'content': 0.1892959028482437, 'timestamp': '2025-09-10 02:28:28.184136', 'step': 2097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:28.219337', 'step': 2097, 'epoch': 1} {'type': 'loss', 'content': 0.2563339173793793, 'timestamp': '2025-09-10 02:28:28.223752', 'step': 2098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:28.254869', 'step': 2098, 'epoch': 1} {'type': 'loss', 'content': 0.1724126935005188, 'timestamp': '2025-09-10 02:28:28.259719', 'step': 2099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:28.295416', 'step': 2099, 'epoch': 1} {'type': 'loss', 'content': 0.1529167741537094, 'timestamp': '2025-09-10 02:28:28.320512', 'step': 2100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:28.360951', 'step': 2100, 'epoch': 1} {'type': 'loss', 'content': 0.17594149708747864, 'timestamp': '2025-09-10 02:28:28.363415', 'step': 2101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:28.394434', 'step': 2101, 'epoch': 1} {'type': 'loss', 'content': 0.22538071870803833, 'timestamp': '2025-09-10 02:28:28.396966', 'step': 2102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:28.428590', 'step': 2102, 'epoch': 1} {'type': 'loss', 'content': 0.1813848316669464, 'timestamp': '2025-09-10 02:28:28.431080', 'step': 2103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:28.464771', 'step': 2103, 'epoch': 1} {'type': 'loss', 'content': 0.15785816311836243, 'timestamp': '2025-09-10 02:28:28.489564', 'step': 2104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:28.521750', 'step': 2104, 'epoch': 1} {'type': 'loss', 'content': 0.14450566470623016, 'timestamp': '2025-09-10 02:28:28.524598', 'step': 2105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:28.555671', 'step': 2105, 'epoch': 1} {'type': 'loss', 'content': 0.19362777471542358, 'timestamp': '2025-09-10 02:28:28.559795', 'step': 2106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:28.597652', 'step': 2106, 'epoch': 1} {'type': 'loss', 'content': 0.17338186502456665, 'timestamp': '2025-09-10 02:28:28.600253', 'step': 2107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:28.639500', 'step': 2107, 'epoch': 1} {'type': 'loss', 'content': 0.20227278769016266, 'timestamp': '2025-09-10 02:28:28.664048', 'step': 2108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:28.701213', 'step': 2108, 'epoch': 1} {'type': 'loss', 'content': 0.19312487542629242, 'timestamp': '2025-09-10 02:28:28.703511', 'step': 2109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:28.734517', 'step': 2109, 'epoch': 1} {'type': 'loss', 'content': 0.1147761419415474, 'timestamp': '2025-09-10 02:28:28.739660', 'step': 2110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:28.776048', 'step': 2110, 'epoch': 1} {'type': 'loss', 'content': 0.2134326696395874, 'timestamp': '2025-09-10 02:28:28.778481', 'step': 2111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:28.811351', 'step': 2111, 'epoch': 1} {'type': 'loss', 'content': 0.16893760859966278, 'timestamp': '2025-09-10 02:28:28.835122', 'step': 2112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:28.870745', 'step': 2112, 'epoch': 1} {'type': 'loss', 'content': 0.15010830760002136, 'timestamp': '2025-09-10 02:28:28.872848', 'step': 2113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:28.911592', 'step': 2113, 'epoch': 1} {'type': 'loss', 'content': 0.16606850922107697, 'timestamp': '2025-09-10 02:28:28.913845', 'step': 2114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:28.944928', 'step': 2114, 'epoch': 1} {'type': 'loss', 'content': 0.31898602843284607, 'timestamp': '2025-09-10 02:28:28.947605', 'step': 2115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:28.980258', 'step': 2115, 'epoch': 1} {'type': 'loss', 'content': 0.2386389821767807, 'timestamp': '2025-09-10 02:28:29.006462', 'step': 2116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:29.040188', 'step': 2116, 'epoch': 1} {'type': 'loss', 'content': 0.17421779036521912, 'timestamp': '2025-09-10 02:28:29.042303', 'step': 2117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:29.073365', 'step': 2117, 'epoch': 1} {'type': 'loss', 'content': 0.14703848958015442, 'timestamp': '2025-09-10 02:28:29.075477', 'step': 2118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.106843', 'step': 2118, 'epoch': 1} {'type': 'loss', 'content': 0.22251540422439575, 'timestamp': '2025-09-10 02:28:29.117878', 'step': 2119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.152635', 'step': 2119, 'epoch': 1} {'type': 'loss', 'content': 0.1648731678724289, 'timestamp': '2025-09-10 02:28:29.176892', 'step': 2120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.207561', 'step': 2120, 'epoch': 1} {'type': 'loss', 'content': 0.13855190575122833, 'timestamp': '2025-09-10 02:28:29.209837', 'step': 2121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:29.242359', 'step': 2121, 'epoch': 1} {'type': 'loss', 'content': 0.17532718181610107, 'timestamp': '2025-09-10 02:28:29.244778', 'step': 2122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.285964', 'step': 2122, 'epoch': 1} {'type': 'loss', 'content': 0.17272181808948517, 'timestamp': '2025-09-10 02:28:29.288205', 'step': 2123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:29.324458', 'step': 2123, 'epoch': 1} {'type': 'loss', 'content': 0.13729384541511536, 'timestamp': '2025-09-10 02:28:29.348423', 'step': 2124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:29.388660', 'step': 2124, 'epoch': 1} {'type': 'loss', 'content': 0.17601045966148376, 'timestamp': '2025-09-10 02:28:29.393124', 'step': 2125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:29.425680', 'step': 2125, 'epoch': 1} {'type': 'loss', 'content': 0.16593393683433533, 'timestamp': '2025-09-10 02:28:29.428149', 'step': 2126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:29.464337', 'step': 2126, 'epoch': 1} {'type': 'loss', 'content': 0.12767142057418823, 'timestamp': '2025-09-10 02:28:29.468197', 'step': 2127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.506841', 'step': 2127, 'epoch': 1} {'type': 'loss', 'content': 0.2897540330886841, 'timestamp': '2025-09-10 02:28:29.530972', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:29.567732', 'step': 2128, 'epoch': 1} {'type': 'loss', 'content': 0.22887198626995087, 'timestamp': '2025-09-10 02:28:29.570867', 'step': 2129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.614090', 'step': 2129, 'epoch': 1} {'type': 'loss', 'content': 0.14107093214988708, 'timestamp': '2025-09-10 02:28:29.625881', 'step': 2130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:29.682525', 'step': 2130, 'epoch': 1} {'type': 'loss', 'content': 0.1455211043357849, 'timestamp': '2025-09-10 02:28:29.697544', 'step': 2131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:29.751346', 'step': 2131, 'epoch': 1} {'type': 'loss', 'content': 0.22998763620853424, 'timestamp': '2025-09-10 02:28:29.785961', 'step': 2132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:29.845400', 'step': 2132, 'epoch': 1} {'type': 'loss', 'content': 0.12464089691638947, 'timestamp': '2025-09-10 02:28:29.851575', 'step': 2133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:29.903937', 'step': 2133, 'epoch': 1} {'type': 'loss', 'content': 0.11582202464342117, 'timestamp': '2025-09-10 02:28:29.917773', 'step': 2134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:29.956819', 'step': 2134, 'epoch': 1} {'type': 'loss', 'content': 0.35038936138153076, 'timestamp': '2025-09-10 02:28:29.973915', 'step': 2135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.019138', 'step': 2135, 'epoch': 1} {'type': 'loss', 'content': 0.08949153870344162, 'timestamp': '2025-09-10 02:28:30.046807', 'step': 2136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:30.099399', 'step': 2136, 'epoch': 1} {'type': 'loss', 'content': 0.17133666574954987, 'timestamp': '2025-09-10 02:28:30.113377', 'step': 2137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:30.160029', 'step': 2137, 'epoch': 1} {'type': 'loss', 'content': 0.20443560183048248, 'timestamp': '2025-09-10 02:28:30.163546', 'step': 2138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:30.212671', 'step': 2138, 'epoch': 1} {'type': 'loss', 'content': 0.0869482085108757, 'timestamp': '2025-09-10 02:28:30.470686', 'step': 2139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:30.799867', 'step': 2139, 'epoch': 1} {'type': 'loss', 'content': 0.16678142547607422, 'timestamp': '2025-09-10 02:28:30.825498', 'step': 2140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.875517', 'step': 2140, 'epoch': 1} {'type': 'loss', 'content': 0.19328367710113525, 'timestamp': '2025-09-10 02:28:30.881310', 'step': 2141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:30.924487', 'step': 2141, 'epoch': 1} {'type': 'loss', 'content': 0.12158521264791489, 'timestamp': '2025-09-10 02:28:30.928775', 'step': 2142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:30.973036', 'step': 2142, 'epoch': 1} {'type': 'loss', 'content': 0.1891479194164276, 'timestamp': '2025-09-10 02:28:30.977268', 'step': 2143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:31.017642', 'step': 2143, 'epoch': 1} {'type': 'loss', 'content': 0.16622024774551392, 'timestamp': '2025-09-10 02:28:31.041809', 'step': 2144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:31.089811', 'step': 2144, 'epoch': 1} {'type': 'loss', 'content': 0.15217173099517822, 'timestamp': '2025-09-10 02:28:31.096384', 'step': 2145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.147286', 'step': 2145, 'epoch': 1} {'type': 'loss', 'content': 0.15235860645771027, 'timestamp': '2025-09-10 02:28:31.157115', 'step': 2146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:31.196154', 'step': 2146, 'epoch': 1} {'type': 'loss', 'content': 0.12557567656040192, 'timestamp': '2025-09-10 02:28:31.200223', 'step': 2147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.240379', 'step': 2147, 'epoch': 1} {'type': 'loss', 'content': 0.16425113379955292, 'timestamp': '2025-09-10 02:28:31.270779', 'step': 2148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.335562', 'step': 2148, 'epoch': 1} {'type': 'loss', 'content': 0.1442798525094986, 'timestamp': '2025-09-10 02:28:31.357661', 'step': 2149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.421800', 'step': 2149, 'epoch': 1} {'type': 'loss', 'content': 0.12548384070396423, 'timestamp': '2025-09-10 02:28:31.427671', 'step': 2150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.482022', 'step': 2150, 'epoch': 1} {'type': 'loss', 'content': 0.26089099049568176, 'timestamp': '2025-09-10 02:28:31.487855', 'step': 2151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:31.546871', 'step': 2151, 'epoch': 1} {'type': 'loss', 'content': 0.15879899263381958, 'timestamp': '2025-09-10 02:28:31.572772', 'step': 2152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:31.619392', 'step': 2152, 'epoch': 1} {'type': 'loss', 'content': 0.09777294844388962, 'timestamp': '2025-09-10 02:28:31.625160', 'step': 2153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:31.674313', 'step': 2153, 'epoch': 1} {'type': 'loss', 'content': 0.15459416806697845, 'timestamp': '2025-09-10 02:28:31.679312', 'step': 2154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:31.722065', 'step': 2154, 'epoch': 1} {'type': 'loss', 'content': 0.16138091683387756, 'timestamp': '2025-09-10 02:28:31.740665', 'step': 2155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:31.796323', 'step': 2155, 'epoch': 1} {'type': 'loss', 'content': 0.22298423945903778, 'timestamp': '2025-09-10 02:28:31.824092', 'step': 2156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:31.861927', 'step': 2156, 'epoch': 1} {'type': 'loss', 'content': 0.2096431702375412, 'timestamp': '2025-09-10 02:28:31.894330', 'step': 2157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:31.935499', 'step': 2157, 'epoch': 1} {'type': 'loss', 'content': 0.1302809715270996, 'timestamp': '2025-09-10 02:28:31.939447', 'step': 2158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:31.975912', 'step': 2158, 'epoch': 1} {'type': 'loss', 'content': 0.25533923506736755, 'timestamp': '2025-09-10 02:28:31.981034', 'step': 2159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:32.035204', 'step': 2159, 'epoch': 1} {'type': 'loss', 'content': 0.1641375869512558, 'timestamp': '2025-09-10 02:28:32.067901', 'step': 2160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:32.111981', 'step': 2160, 'epoch': 1} {'type': 'loss', 'content': 0.16524134576320648, 'timestamp': '2025-09-10 02:28:32.121123', 'step': 2161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:32.174887', 'step': 2161, 'epoch': 1} {'type': 'loss', 'content': 0.2527412474155426, 'timestamp': '2025-09-10 02:28:32.185367', 'step': 2162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:32.237671', 'step': 2162, 'epoch': 1} {'type': 'loss', 'content': 0.21125708520412445, 'timestamp': '2025-09-10 02:28:32.242645', 'step': 2163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:32.294665', 'step': 2163, 'epoch': 1} {'type': 'loss', 'content': 0.2046569287776947, 'timestamp': '2025-09-10 02:28:32.318985', 'step': 2164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:32.366504', 'step': 2164, 'epoch': 1} {'type': 'loss', 'content': 0.20035617053508759, 'timestamp': '2025-09-10 02:28:32.373903', 'step': 2165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:32.442176', 'step': 2165, 'epoch': 1} {'type': 'loss', 'content': 0.11857417225837708, 'timestamp': '2025-09-10 02:28:32.446774', 'step': 2166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:32.507588', 'step': 2166, 'epoch': 1} {'type': 'loss', 'content': 0.16307280957698822, 'timestamp': '2025-09-10 02:28:32.519170', 'step': 2167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:32.562883', 'step': 2167, 'epoch': 1} {'type': 'loss', 'content': 0.2768018841743469, 'timestamp': '2025-09-10 02:28:32.592409', 'step': 2168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:32.639213', 'step': 2168, 'epoch': 1} {'type': 'loss', 'content': 0.20001979172229767, 'timestamp': '2025-09-10 02:28:32.647663', 'step': 2169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:32.700385', 'step': 2169, 'epoch': 1} {'type': 'loss', 'content': 0.15629853308200836, 'timestamp': '2025-09-10 02:28:32.707940', 'step': 2170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:32.761636', 'step': 2170, 'epoch': 1} {'type': 'loss', 'content': 0.2094639241695404, 'timestamp': '2025-09-10 02:28:32.769462', 'step': 2171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:32.826373', 'step': 2171, 'epoch': 1} {'type': 'loss', 'content': 0.2718113362789154, 'timestamp': '2025-09-10 02:28:32.851881', 'step': 2172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:32.907953', 'step': 2172, 'epoch': 1} {'type': 'loss', 'content': 0.21296735107898712, 'timestamp': '2025-09-10 02:28:32.919077', 'step': 2173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:32.963421', 'step': 2173, 'epoch': 1} {'type': 'loss', 'content': 0.19985322654247284, 'timestamp': '2025-09-10 02:28:32.971737', 'step': 2174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:33.013775', 'step': 2174, 'epoch': 1} {'type': 'loss', 'content': 0.2375837117433548, 'timestamp': '2025-09-10 02:28:33.021129', 'step': 2175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:33.062611', 'step': 2175, 'epoch': 1} {'type': 'loss', 'content': 0.17284898459911346, 'timestamp': '2025-09-10 02:28:33.103810', 'step': 2176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:33.144438', 'step': 2176, 'epoch': 1} {'type': 'loss', 'content': 0.1389753371477127, 'timestamp': '2025-09-10 02:28:33.158034', 'step': 2177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:33.205760', 'step': 2177, 'epoch': 1} {'type': 'loss', 'content': 0.11274576187133789, 'timestamp': '2025-09-10 02:28:33.215352', 'step': 2178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:33.268980', 'step': 2178, 'epoch': 1} {'type': 'loss', 'content': 0.10608898103237152, 'timestamp': '2025-09-10 02:28:33.274614', 'step': 2179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:33.322637', 'step': 2179, 'epoch': 1} {'type': 'loss', 'content': 0.22656595706939697, 'timestamp': '2025-09-10 02:28:33.351561', 'step': 2180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:33.407944', 'step': 2180, 'epoch': 1} {'type': 'loss', 'content': 0.16387106478214264, 'timestamp': '2025-09-10 02:28:33.421224', 'step': 2181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:33.479286', 'step': 2181, 'epoch': 1} {'type': 'loss', 'content': 0.2982123792171478, 'timestamp': '2025-09-10 02:28:33.485616', 'step': 2182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:33.550541', 'step': 2182, 'epoch': 1} {'type': 'loss', 'content': 0.12906105816364288, 'timestamp': '2025-09-10 02:28:33.556991', 'step': 2183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:33.607190', 'step': 2183, 'epoch': 1} {'type': 'loss', 'content': 0.2404470145702362, 'timestamp': '2025-09-10 02:28:33.634267', 'step': 2184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:33.702451', 'step': 2184, 'epoch': 1} {'type': 'loss', 'content': 0.2684463560581207, 'timestamp': '2025-09-10 02:28:33.715913', 'step': 2185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:33.785203', 'step': 2185, 'epoch': 1} {'type': 'loss', 'content': 0.20989224314689636, 'timestamp': '2025-09-10 02:28:33.795024', 'step': 2186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:33.852070', 'step': 2186, 'epoch': 1} {'type': 'loss', 'content': 0.16245822608470917, 'timestamp': '2025-09-10 02:28:33.858100', 'step': 2187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:33.936825', 'step': 2187, 'epoch': 1} {'type': 'loss', 'content': 0.23591797053813934, 'timestamp': '2025-09-10 02:28:33.971180', 'step': 2188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:34.043196', 'step': 2188, 'epoch': 1} {'type': 'loss', 'content': 0.17766578495502472, 'timestamp': '2025-09-10 02:28:34.070871', 'step': 2189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:34.130814', 'step': 2189, 'epoch': 1} {'type': 'loss', 'content': 0.15208706259727478, 'timestamp': '2025-09-10 02:28:34.137410', 'step': 2190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:34.203911', 'step': 2190, 'epoch': 1} {'type': 'loss', 'content': 0.158927783370018, 'timestamp': '2025-09-10 02:28:34.223398', 'step': 2191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:34.297362', 'step': 2191, 'epoch': 1} {'type': 'loss', 'content': 0.16377702355384827, 'timestamp': '2025-09-10 02:28:34.324854', 'step': 2192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:34.368278', 'step': 2192, 'epoch': 1} {'type': 'loss', 'content': 0.16825014352798462, 'timestamp': '2025-09-10 02:28:34.378304', 'step': 2193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:34.420292', 'step': 2193, 'epoch': 1} {'type': 'loss', 'content': 0.17882561683654785, 'timestamp': '2025-09-10 02:28:34.426482', 'step': 2194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:34.473226', 'step': 2194, 'epoch': 1} {'type': 'loss', 'content': 0.20024679601192474, 'timestamp': '2025-09-10 02:28:34.479012', 'step': 2195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:34.552786', 'step': 2195, 'epoch': 1} {'type': 'loss', 'content': 0.190882608294487, 'timestamp': '2025-09-10 02:28:34.580673', 'step': 2196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:34.633183', 'step': 2196, 'epoch': 1} {'type': 'loss', 'content': 0.13207082450389862, 'timestamp': '2025-09-10 02:28:34.643282', 'step': 2197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:34.683181', 'step': 2197, 'epoch': 1} {'type': 'loss', 'content': 0.081097811460495, 'timestamp': '2025-09-10 02:28:34.690555', 'step': 2198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:34.775181', 'step': 2198, 'epoch': 1} {'type': 'loss', 'content': 0.18784460425376892, 'timestamp': '2025-09-10 02:28:34.782004', 'step': 2199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:34.854820', 'step': 2199, 'epoch': 1} {'type': 'loss', 'content': 0.24343252182006836, 'timestamp': '2025-09-10 02:28:34.883684', 'step': 2200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:34.939263', 'step': 2200, 'epoch': 1} {'type': 'loss', 'content': 0.11287408322095871, 'timestamp': '2025-09-10 02:28:34.954778', 'step': 2201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:35.044930', 'step': 2201, 'epoch': 1} {'type': 'loss', 'content': 0.19452007114887238, 'timestamp': '2025-09-10 02:28:35.051067', 'step': 2202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:35.096742', 'step': 2202, 'epoch': 1} {'type': 'loss', 'content': 0.1674569994211197, 'timestamp': '2025-09-10 02:28:35.113821', 'step': 2203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:35.176302', 'step': 2203, 'epoch': 1} {'type': 'loss', 'content': 0.0898599699139595, 'timestamp': '2025-09-10 02:28:35.207457', 'step': 2204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:35.255084', 'step': 2204, 'epoch': 1} {'type': 'loss', 'content': 0.11661402136087418, 'timestamp': '2025-09-10 02:28:35.263480', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:35.297547', 'step': 2205, 'epoch': 1} {'type': 'loss', 'content': 0.2759246528148651, 'timestamp': '2025-09-10 02:28:35.299848', 'step': 2206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:35.330816', 'step': 2206, 'epoch': 1} {'type': 'loss', 'content': 0.15983107686042786, 'timestamp': '2025-09-10 02:28:35.335077', 'step': 2207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:35.365473', 'step': 2207, 'epoch': 1} {'type': 'loss', 'content': 0.2520526945590973, 'timestamp': '2025-09-10 02:28:35.389246', 'step': 2208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:35.421760', 'step': 2208, 'epoch': 1} {'type': 'loss', 'content': 0.12684406340122223, 'timestamp': '2025-09-10 02:28:35.425921', 'step': 2209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:35.456954', 'step': 2209, 'epoch': 1} {'type': 'loss', 'content': 0.1859854906797409, 'timestamp': '2025-09-10 02:28:35.460078', 'step': 2210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:35.504560', 'step': 2210, 'epoch': 1} {'type': 'loss', 'content': 0.32180461287498474, 'timestamp': '2025-09-10 02:28:35.507343', 'step': 2211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:35.540666', 'step': 2211, 'epoch': 1} {'type': 'loss', 'content': 0.34884053468704224, 'timestamp': '2025-09-10 02:28:35.564655', 'step': 2212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:35.596739', 'step': 2212, 'epoch': 1} {'type': 'loss', 'content': 0.08329395204782486, 'timestamp': '2025-09-10 02:28:35.599402', 'step': 2213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:35.633278', 'step': 2213, 'epoch': 1} {'type': 'loss', 'content': 0.21232271194458008, 'timestamp': '2025-09-10 02:28:35.635748', 'step': 2214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:35.666794', 'step': 2214, 'epoch': 1} {'type': 'loss', 'content': 0.182424396276474, 'timestamp': '2025-09-10 02:28:35.668929', 'step': 2215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:35.704968', 'step': 2215, 'epoch': 1} {'type': 'loss', 'content': 0.21834400296211243, 'timestamp': '2025-09-10 02:28:35.728711', 'step': 2216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:35.759726', 'step': 2216, 'epoch': 1} {'type': 'loss', 'content': 0.10305114090442657, 'timestamp': '2025-09-10 02:28:35.762553', 'step': 2217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:35.793827', 'step': 2217, 'epoch': 1} {'type': 'loss', 'content': 0.2509710490703583, 'timestamp': '2025-09-10 02:28:35.797080', 'step': 2218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:35.833456', 'step': 2218, 'epoch': 1} {'type': 'loss', 'content': 0.13238337635993958, 'timestamp': '2025-09-10 02:28:35.836037', 'step': 2219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:35.867140', 'step': 2219, 'epoch': 1} {'type': 'loss', 'content': 0.16178229451179504, 'timestamp': '2025-09-10 02:28:35.891951', 'step': 2220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:35.927172', 'step': 2220, 'epoch': 1} {'type': 'loss', 'content': 0.1762433797121048, 'timestamp': '2025-09-10 02:28:35.929883', 'step': 2221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:35.961739', 'step': 2221, 'epoch': 1} {'type': 'loss', 'content': 0.12889458239078522, 'timestamp': '2025-09-10 02:28:35.964363', 'step': 2222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:35.997175', 'step': 2222, 'epoch': 1} {'type': 'loss', 'content': 0.23826995491981506, 'timestamp': '2025-09-10 02:28:36.001213', 'step': 2223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.039006', 'step': 2223, 'epoch': 1} {'type': 'loss', 'content': 0.21591730415821075, 'timestamp': '2025-09-10 02:28:36.063000', 'step': 2224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.094378', 'step': 2224, 'epoch': 1} {'type': 'loss', 'content': 0.19113188982009888, 'timestamp': '2025-09-10 02:28:36.096639', 'step': 2225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:36.131850', 'step': 2225, 'epoch': 1} {'type': 'loss', 'content': 0.21697525680065155, 'timestamp': '2025-09-10 02:28:36.137754', 'step': 2226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:36.169828', 'step': 2226, 'epoch': 1} {'type': 'loss', 'content': 0.2160933017730713, 'timestamp': '2025-09-10 02:28:36.175996', 'step': 2227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:36.208936', 'step': 2227, 'epoch': 1} {'type': 'loss', 'content': 0.12974846363067627, 'timestamp': '2025-09-10 02:28:36.232948', 'step': 2228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:36.265394', 'step': 2228, 'epoch': 1} {'type': 'loss', 'content': 0.1772821843624115, 'timestamp': '2025-09-10 02:28:36.267901', 'step': 2229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:36.305898', 'step': 2229, 'epoch': 1} {'type': 'loss', 'content': 0.2022232711315155, 'timestamp': '2025-09-10 02:28:36.308187', 'step': 2230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:36.340088', 'step': 2230, 'epoch': 1} {'type': 'loss', 'content': 0.12529169023036957, 'timestamp': '2025-09-10 02:28:36.346738', 'step': 2231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:36.380403', 'step': 2231, 'epoch': 1} {'type': 'loss', 'content': 0.15817013382911682, 'timestamp': '2025-09-10 02:28:36.405895', 'step': 2232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:36.438662', 'step': 2232, 'epoch': 1} {'type': 'loss', 'content': 0.09020853042602539, 'timestamp': '2025-09-10 02:28:36.441286', 'step': 2233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:36.473272', 'step': 2233, 'epoch': 1} {'type': 'loss', 'content': 0.18746834993362427, 'timestamp': '2025-09-10 02:28:36.476724', 'step': 2234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.522309', 'step': 2234, 'epoch': 1} {'type': 'loss', 'content': 0.19802385568618774, 'timestamp': '2025-09-10 02:28:36.525943', 'step': 2235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:36.558738', 'step': 2235, 'epoch': 1} {'type': 'loss', 'content': 0.159601628780365, 'timestamp': '2025-09-10 02:28:36.582920', 'step': 2236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:36.615682', 'step': 2236, 'epoch': 1} {'type': 'loss', 'content': 0.32047170400619507, 'timestamp': '2025-09-10 02:28:36.618001', 'step': 2237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.649024', 'step': 2237, 'epoch': 1} {'type': 'loss', 'content': 0.15212000906467438, 'timestamp': '2025-09-10 02:28:36.651602', 'step': 2238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:36.682167', 'step': 2238, 'epoch': 1} {'type': 'loss', 'content': 0.1608995646238327, 'timestamp': '2025-09-10 02:28:36.684858', 'step': 2239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:36.715134', 'step': 2239, 'epoch': 1} {'type': 'loss', 'content': 0.10377604514360428, 'timestamp': '2025-09-10 02:28:36.738655', 'step': 2240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:36.769454', 'step': 2240, 'epoch': 1} {'type': 'loss', 'content': 0.23884934186935425, 'timestamp': '2025-09-10 02:28:36.771711', 'step': 2241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:36.801521', 'step': 2241, 'epoch': 1} {'type': 'loss', 'content': 0.18301883339881897, 'timestamp': '2025-09-10 02:28:36.804045', 'step': 2242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:36.834657', 'step': 2242, 'epoch': 1} {'type': 'loss', 'content': 0.23328067362308502, 'timestamp': '2025-09-10 02:28:36.837384', 'step': 2243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:36.867820', 'step': 2243, 'epoch': 1} {'type': 'loss', 'content': 0.11472243070602417, 'timestamp': '2025-09-10 02:28:36.891872', 'step': 2244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:36.923186', 'step': 2244, 'epoch': 1} {'type': 'loss', 'content': 0.23079487681388855, 'timestamp': '2025-09-10 02:28:36.925536', 'step': 2245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:36.957025', 'step': 2245, 'epoch': 1} {'type': 'loss', 'content': 0.1599379926919937, 'timestamp': '2025-09-10 02:28:36.959861', 'step': 2246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:36.990311', 'step': 2246, 'epoch': 1} {'type': 'loss', 'content': 0.23568807542324066, 'timestamp': '2025-09-10 02:28:36.993522', 'step': 2247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:37.024388', 'step': 2247, 'epoch': 1} {'type': 'loss', 'content': 0.16115376353263855, 'timestamp': '2025-09-10 02:28:37.048599', 'step': 2248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.080552', 'step': 2248, 'epoch': 1} {'type': 'loss', 'content': 0.14929679036140442, 'timestamp': '2025-09-10 02:28:37.083165', 'step': 2249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:37.113736', 'step': 2249, 'epoch': 1} {'type': 'loss', 'content': 0.17776907980442047, 'timestamp': '2025-09-10 02:28:37.116148', 'step': 2250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:37.148782', 'step': 2250, 'epoch': 1} {'type': 'loss', 'content': 0.1304091364145279, 'timestamp': '2025-09-10 02:28:37.151591', 'step': 2251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.183192', 'step': 2251, 'epoch': 1} {'type': 'loss', 'content': 0.22149237990379333, 'timestamp': '2025-09-10 02:28:37.206788', 'step': 2252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.237046', 'step': 2252, 'epoch': 1} {'type': 'loss', 'content': 0.16166771948337555, 'timestamp': '2025-09-10 02:28:37.240571', 'step': 2253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.273029', 'step': 2253, 'epoch': 1} {'type': 'loss', 'content': 0.15543808043003082, 'timestamp': '2025-09-10 02:28:37.275882', 'step': 2254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:37.307182', 'step': 2254, 'epoch': 1} {'type': 'loss', 'content': 0.1888081133365631, 'timestamp': '2025-09-10 02:28:37.311448', 'step': 2255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:37.341127', 'step': 2255, 'epoch': 1} {'type': 'loss', 'content': 0.14373362064361572, 'timestamp': '2025-09-10 02:28:37.364911', 'step': 2256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.396296', 'step': 2256, 'epoch': 1} {'type': 'loss', 'content': 0.20972725749015808, 'timestamp': '2025-09-10 02:28:37.398619', 'step': 2257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.429390', 'step': 2257, 'epoch': 1} {'type': 'loss', 'content': 0.09957583248615265, 'timestamp': '2025-09-10 02:28:37.431975', 'step': 2258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:37.463452', 'step': 2258, 'epoch': 1} {'type': 'loss', 'content': 0.2564125061035156, 'timestamp': '2025-09-10 02:28:37.465693', 'step': 2259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:37.497663', 'step': 2259, 'epoch': 1} {'type': 'loss', 'content': 0.16037538647651672, 'timestamp': '2025-09-10 02:28:37.521401', 'step': 2260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.552769', 'step': 2260, 'epoch': 1} {'type': 'loss', 'content': 0.1653321534395218, 'timestamp': '2025-09-10 02:28:37.555415', 'step': 2261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.588362', 'step': 2261, 'epoch': 1} {'type': 'loss', 'content': 0.21672174334526062, 'timestamp': '2025-09-10 02:28:37.591398', 'step': 2262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:37.622459', 'step': 2262, 'epoch': 1} {'type': 'loss', 'content': 0.157818004488945, 'timestamp': '2025-09-10 02:28:37.625061', 'step': 2263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.655786', 'step': 2263, 'epoch': 1} {'type': 'loss', 'content': 0.13471673429012299, 'timestamp': '2025-09-10 02:28:37.680158', 'step': 2264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:37.711438', 'step': 2264, 'epoch': 1} {'type': 'loss', 'content': 0.14373400807380676, 'timestamp': '2025-09-10 02:28:37.714154', 'step': 2265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.744640', 'step': 2265, 'epoch': 1} {'type': 'loss', 'content': 0.15640702843666077, 'timestamp': '2025-09-10 02:28:37.746735', 'step': 2266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:37.777032', 'step': 2266, 'epoch': 1} {'type': 'loss', 'content': 0.20302584767341614, 'timestamp': '2025-09-10 02:28:37.779305', 'step': 2267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.809793', 'step': 2267, 'epoch': 1} {'type': 'loss', 'content': 0.12667277455329895, 'timestamp': '2025-09-10 02:28:37.833313', 'step': 2268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.864674', 'step': 2268, 'epoch': 1} {'type': 'loss', 'content': 0.2716490924358368, 'timestamp': '2025-09-10 02:28:37.867004', 'step': 2269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.899978', 'step': 2269, 'epoch': 1} {'type': 'loss', 'content': 0.2189035713672638, 'timestamp': '2025-09-10 02:28:37.902240', 'step': 2270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.932220', 'step': 2270, 'epoch': 1} {'type': 'loss', 'content': 0.16362804174423218, 'timestamp': '2025-09-10 02:28:37.934742', 'step': 2271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.965531', 'step': 2271, 'epoch': 1} {'type': 'loss', 'content': 0.1426159143447876, 'timestamp': '2025-09-10 02:28:37.989240', 'step': 2272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:38.027229', 'step': 2272, 'epoch': 1} {'type': 'loss', 'content': 0.2594868838787079, 'timestamp': '2025-09-10 02:28:38.029658', 'step': 2273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.061730', 'step': 2273, 'epoch': 1} {'type': 'loss', 'content': 0.13664013147354126, 'timestamp': '2025-09-10 02:28:38.065001', 'step': 2274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.096024', 'step': 2274, 'epoch': 1} {'type': 'loss', 'content': 0.2201901227235794, 'timestamp': '2025-09-10 02:28:38.098448', 'step': 2275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.130190', 'step': 2275, 'epoch': 1} {'type': 'loss', 'content': 0.20522071421146393, 'timestamp': '2025-09-10 02:28:38.153736', 'step': 2276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.184695', 'step': 2276, 'epoch': 1} {'type': 'loss', 'content': 0.19155491888523102, 'timestamp': '2025-09-10 02:28:38.187055', 'step': 2277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.217210', 'step': 2277, 'epoch': 1} {'type': 'loss', 'content': 0.24116840958595276, 'timestamp': '2025-09-10 02:28:38.220841', 'step': 2278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.252925', 'step': 2278, 'epoch': 1} {'type': 'loss', 'content': 0.2457766830921173, 'timestamp': '2025-09-10 02:28:38.255721', 'step': 2279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:38.286586', 'step': 2279, 'epoch': 1} {'type': 'loss', 'content': 0.19510158896446228, 'timestamp': '2025-09-10 02:28:38.309917', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.340631', 'step': 2280, 'epoch': 1} {'type': 'loss', 'content': 0.2581395208835602, 'timestamp': '2025-09-10 02:28:38.343153', 'step': 2281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.374303', 'step': 2281, 'epoch': 1} {'type': 'loss', 'content': 0.18516093492507935, 'timestamp': '2025-09-10 02:28:38.376472', 'step': 2282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.406401', 'step': 2282, 'epoch': 1} {'type': 'loss', 'content': 0.181737020611763, 'timestamp': '2025-09-10 02:28:38.408992', 'step': 2283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.438903', 'step': 2283, 'epoch': 1} {'type': 'loss', 'content': 0.20057207345962524, 'timestamp': '2025-09-10 02:28:38.462480', 'step': 2284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.496263', 'step': 2284, 'epoch': 1} {'type': 'loss', 'content': 0.11328975111246109, 'timestamp': '2025-09-10 02:28:38.504091', 'step': 2285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:38.535724', 'step': 2285, 'epoch': 1} {'type': 'loss', 'content': 0.11735454201698303, 'timestamp': '2025-09-10 02:28:38.538027', 'step': 2286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:38.568151', 'step': 2286, 'epoch': 1} {'type': 'loss', 'content': 0.14458923041820526, 'timestamp': '2025-09-10 02:28:38.570367', 'step': 2287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.602025', 'step': 2287, 'epoch': 1} {'type': 'loss', 'content': 0.17131578922271729, 'timestamp': '2025-09-10 02:28:38.626862', 'step': 2288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:38.657110', 'step': 2288, 'epoch': 1} {'type': 'loss', 'content': 0.2458893358707428, 'timestamp': '2025-09-10 02:28:38.659820', 'step': 2289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:38.692656', 'step': 2289, 'epoch': 1} {'type': 'loss', 'content': 0.20689624547958374, 'timestamp': '2025-09-10 02:28:38.695390', 'step': 2290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.726477', 'step': 2290, 'epoch': 1} {'type': 'loss', 'content': 0.2260909527540207, 'timestamp': '2025-09-10 02:28:38.728883', 'step': 2291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.759664', 'step': 2291, 'epoch': 1} {'type': 'loss', 'content': 0.17267581820487976, 'timestamp': '2025-09-10 02:28:38.783724', 'step': 2292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.815047', 'step': 2292, 'epoch': 1} {'type': 'loss', 'content': 0.19893933832645416, 'timestamp': '2025-09-10 02:28:38.817512', 'step': 2293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.847935', 'step': 2293, 'epoch': 1} {'type': 'loss', 'content': 0.18830157816410065, 'timestamp': '2025-09-10 02:28:38.850441', 'step': 2294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:38.887915', 'step': 2294, 'epoch': 1} {'type': 'loss', 'content': 0.24100685119628906, 'timestamp': '2025-09-10 02:28:38.894101', 'step': 2295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.936527', 'step': 2295, 'epoch': 1} {'type': 'loss', 'content': 0.1413300335407257, 'timestamp': '2025-09-10 02:28:38.960349', 'step': 2296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.991325', 'step': 2296, 'epoch': 1} {'type': 'loss', 'content': 0.20035089552402496, 'timestamp': '2025-09-10 02:28:38.993549', 'step': 2297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:39.024261', 'step': 2297, 'epoch': 1} {'type': 'loss', 'content': 0.10857169330120087, 'timestamp': '2025-09-10 02:28:39.026541', 'step': 2298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.057244', 'step': 2298, 'epoch': 1} {'type': 'loss', 'content': 0.1794838160276413, 'timestamp': '2025-09-10 02:28:39.059679', 'step': 2299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:39.090039', 'step': 2299, 'epoch': 1} {'type': 'loss', 'content': 0.18201710283756256, 'timestamp': '2025-09-10 02:28:39.113605', 'step': 2300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.146018', 'step': 2300, 'epoch': 1} {'type': 'loss', 'content': 0.15387409925460815, 'timestamp': '2025-09-10 02:28:39.148354', 'step': 2301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.178392', 'step': 2301, 'epoch': 1} {'type': 'loss', 'content': 0.1500789374113083, 'timestamp': '2025-09-10 02:28:39.180975', 'step': 2302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:39.211238', 'step': 2302, 'epoch': 1} {'type': 'loss', 'content': 0.24157707393169403, 'timestamp': '2025-09-10 02:28:39.214132', 'step': 2303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.244609', 'step': 2303, 'epoch': 1} {'type': 'loss', 'content': 0.18798987567424774, 'timestamp': '2025-09-10 02:28:39.268135', 'step': 2304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:39.300695', 'step': 2304, 'epoch': 1} {'type': 'loss', 'content': 0.17982591688632965, 'timestamp': '2025-09-10 02:28:39.303147', 'step': 2305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.333464', 'step': 2305, 'epoch': 1} {'type': 'loss', 'content': 0.2181558609008789, 'timestamp': '2025-09-10 02:28:39.335643', 'step': 2306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.365793', 'step': 2306, 'epoch': 1} {'type': 'loss', 'content': 0.12168923020362854, 'timestamp': '2025-09-10 02:28:39.368332', 'step': 2307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.398872', 'step': 2307, 'epoch': 1} {'type': 'loss', 'content': 0.35137635469436646, 'timestamp': '2025-09-10 02:28:39.422680', 'step': 2308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:39.453583', 'step': 2308, 'epoch': 1} {'type': 'loss', 'content': 0.22321346402168274, 'timestamp': '2025-09-10 02:28:39.456233', 'step': 2309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.489268', 'step': 2309, 'epoch': 1} {'type': 'loss', 'content': 0.2231341004371643, 'timestamp': '2025-09-10 02:28:39.493670', 'step': 2310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.532174', 'step': 2310, 'epoch': 1} {'type': 'loss', 'content': 0.10829925537109375, 'timestamp': '2025-09-10 02:28:39.534778', 'step': 2311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.567639', 'step': 2311, 'epoch': 1} {'type': 'loss', 'content': 0.17225541174411774, 'timestamp': '2025-09-10 02:28:39.591731', 'step': 2312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:39.624042', 'step': 2312, 'epoch': 1} {'type': 'loss', 'content': 0.14155985414981842, 'timestamp': '2025-09-10 02:28:39.626508', 'step': 2313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.656772', 'step': 2313, 'epoch': 1} {'type': 'loss', 'content': 0.1251799762248993, 'timestamp': '2025-09-10 02:28:39.659024', 'step': 2314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:39.691794', 'step': 2314, 'epoch': 1} {'type': 'loss', 'content': 0.1201777458190918, 'timestamp': '2025-09-10 02:28:39.694213', 'step': 2315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:39.724311', 'step': 2315, 'epoch': 1} {'type': 'loss', 'content': 0.11138292402029037, 'timestamp': '2025-09-10 02:28:39.747851', 'step': 2316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:39.780048', 'step': 2316, 'epoch': 1} {'type': 'loss', 'content': 0.12237819284200668, 'timestamp': '2025-09-10 02:28:39.782311', 'step': 2317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.813430', 'step': 2317, 'epoch': 1} {'type': 'loss', 'content': 0.1089932918548584, 'timestamp': '2025-09-10 02:28:39.815640', 'step': 2318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.846016', 'step': 2318, 'epoch': 1} {'type': 'loss', 'content': 0.19184952974319458, 'timestamp': '2025-09-10 02:28:39.848296', 'step': 2319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:28:39.879620', 'step': 2319, 'epoch': 1} {'type': 'loss', 'content': 0.13583266735076904, 'timestamp': '2025-09-10 02:28:39.905028', 'step': 2320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.935934', 'step': 2320, 'epoch': 1} {'type': 'loss', 'content': 0.255307137966156, 'timestamp': '2025-09-10 02:28:39.938336', 'step': 2321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.968659', 'step': 2321, 'epoch': 1} {'type': 'loss', 'content': 0.19847290217876434, 'timestamp': '2025-09-10 02:28:39.973654', 'step': 2322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:40.005293', 'step': 2322, 'epoch': 1} {'type': 'loss', 'content': 0.16560791432857513, 'timestamp': '2025-09-10 02:28:40.007655', 'step': 2323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:40.037497', 'step': 2323, 'epoch': 1} {'type': 'loss', 'content': 0.21692964434623718, 'timestamp': '2025-09-10 02:28:40.061180', 'step': 2324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:40.094075', 'step': 2324, 'epoch': 1} {'type': 'loss', 'content': 0.23841311037540436, 'timestamp': '2025-09-10 02:28:40.095970', 'step': 2325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:40.126384', 'step': 2325, 'epoch': 1} {'type': 'loss', 'content': 0.12302140891551971, 'timestamp': '2025-09-10 02:28:40.128646', 'step': 2326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:40.161960', 'step': 2326, 'epoch': 1} {'type': 'loss', 'content': 0.18264660239219666, 'timestamp': '2025-09-10 02:28:40.164409', 'step': 2327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:40.196939', 'step': 2327, 'epoch': 1} {'type': 'loss', 'content': 0.12133675068616867, 'timestamp': '2025-09-10 02:28:40.220886', 'step': 2328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:40.251219', 'step': 2328, 'epoch': 1} {'type': 'loss', 'content': 0.14228612184524536, 'timestamp': '2025-09-10 02:28:40.253629', 'step': 2329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:40.285740', 'step': 2329, 'epoch': 1} {'type': 'loss', 'content': 0.14101631939411163, 'timestamp': '2025-09-10 02:28:40.288314', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:28:52.381733', 'step': 2330, 'epoch': 1} {'type': 'pplx', 'content': 7980.803741334045, 'timestamp': '2025-09-10 02:28:52.387244', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.423498', 'step': 2330, 'epoch': 1} {'type': 'loss', 'content': 0.16842778027057648, 'timestamp': '2025-09-10 02:28:52.426940', 'step': 2331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:52.463310', 'step': 2331, 'epoch': 1} {'type': 'loss', 'content': 0.23308154940605164, 'timestamp': '2025-09-10 02:28:52.487977', 'step': 2332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:52.527300', 'step': 2332, 'epoch': 1} {'type': 'loss', 'content': 0.1390797644853592, 'timestamp': '2025-09-10 02:28:52.530026', 'step': 2333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:52.570875', 'step': 2333, 'epoch': 1} {'type': 'loss', 'content': 0.18065233528614044, 'timestamp': '2025-09-10 02:28:52.572974', 'step': 2334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:52.603863', 'step': 2334, 'epoch': 1} {'type': 'loss', 'content': 0.1449277698993683, 'timestamp': '2025-09-10 02:28:52.607419', 'step': 2335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.642212', 'step': 2335, 'epoch': 1} {'type': 'loss', 'content': 0.17154857516288757, 'timestamp': '2025-09-10 02:28:52.669965', 'step': 2336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:52.711232', 'step': 2336, 'epoch': 1} {'type': 'loss', 'content': 0.17714084684848785, 'timestamp': '2025-09-10 02:28:52.715750', 'step': 2337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:52.752666', 'step': 2337, 'epoch': 1} {'type': 'loss', 'content': 0.10828541219234467, 'timestamp': '2025-09-10 02:28:52.758156', 'step': 2338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:52.800015', 'step': 2338, 'epoch': 1} {'type': 'loss', 'content': 0.12317242473363876, 'timestamp': '2025-09-10 02:28:52.803452', 'step': 2339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:52.844975', 'step': 2339, 'epoch': 1} {'type': 'loss', 'content': 0.19322648644447327, 'timestamp': '2025-09-10 02:28:52.872303', 'step': 2340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:52.912886', 'step': 2340, 'epoch': 1} {'type': 'loss', 'content': 0.19992798566818237, 'timestamp': '2025-09-10 02:28:52.918557', 'step': 2341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:52.954822', 'step': 2341, 'epoch': 1} {'type': 'loss', 'content': 0.13974623382091522, 'timestamp': '2025-09-10 02:28:52.957214', 'step': 2342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:52.988384', 'step': 2342, 'epoch': 1} {'type': 'loss', 'content': 0.18276001513004303, 'timestamp': '2025-09-10 02:28:52.997678', 'step': 2343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.034026', 'step': 2343, 'epoch': 1} {'type': 'loss', 'content': 0.14344489574432373, 'timestamp': '2025-09-10 02:28:53.057909', 'step': 2344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.093757', 'step': 2344, 'epoch': 1} {'type': 'loss', 'content': 0.21268954873085022, 'timestamp': '2025-09-10 02:28:53.099038', 'step': 2345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:53.132144', 'step': 2345, 'epoch': 1} {'type': 'loss', 'content': 0.15692219138145447, 'timestamp': '2025-09-10 02:28:53.135850', 'step': 2346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.168560', 'step': 2346, 'epoch': 1} {'type': 'loss', 'content': 0.1566828489303589, 'timestamp': '2025-09-10 02:28:53.171635', 'step': 2347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.204934', 'step': 2347, 'epoch': 1} {'type': 'loss', 'content': 0.15237507224082947, 'timestamp': '2025-09-10 02:28:53.229799', 'step': 2348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.266180', 'step': 2348, 'epoch': 1} {'type': 'loss', 'content': 0.18446169793605804, 'timestamp': '2025-09-10 02:28:53.269882', 'step': 2349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:53.303120', 'step': 2349, 'epoch': 1} {'type': 'loss', 'content': 0.15089552104473114, 'timestamp': '2025-09-10 02:28:53.305842', 'step': 2350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.343435', 'step': 2350, 'epoch': 1} {'type': 'loss', 'content': 0.25243762135505676, 'timestamp': '2025-09-10 02:28:53.346079', 'step': 2351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.386168', 'step': 2351, 'epoch': 1} {'type': 'loss', 'content': 0.07404229789972305, 'timestamp': '2025-09-10 02:28:53.413236', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.448356', 'step': 2352, 'epoch': 1} {'type': 'loss', 'content': 0.1255185455083847, 'timestamp': '2025-09-10 02:28:53.453447', 'step': 2353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:53.486768', 'step': 2353, 'epoch': 1} {'type': 'loss', 'content': 0.08110582083463669, 'timestamp': '2025-09-10 02:28:53.490469', 'step': 2354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.529948', 'step': 2354, 'epoch': 1} {'type': 'loss', 'content': 0.2376258373260498, 'timestamp': '2025-09-10 02:28:53.533051', 'step': 2355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.568943', 'step': 2355, 'epoch': 1} {'type': 'loss', 'content': 0.15725119411945343, 'timestamp': '2025-09-10 02:28:53.595342', 'step': 2356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.628934', 'step': 2356, 'epoch': 1} {'type': 'loss', 'content': 0.16040346026420593, 'timestamp': '2025-09-10 02:28:53.631624', 'step': 2357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:53.663443', 'step': 2357, 'epoch': 1} {'type': 'loss', 'content': 0.12106496840715408, 'timestamp': '2025-09-10 02:28:53.666109', 'step': 2358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:53.697372', 'step': 2358, 'epoch': 1} {'type': 'loss', 'content': 0.17550450563430786, 'timestamp': '2025-09-10 02:28:53.701556', 'step': 2359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.738488', 'step': 2359, 'epoch': 1} {'type': 'loss', 'content': 0.20676539838314056, 'timestamp': '2025-09-10 02:28:53.762692', 'step': 2360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.796775', 'step': 2360, 'epoch': 1} {'type': 'loss', 'content': 0.2653042674064636, 'timestamp': '2025-09-10 02:28:53.804661', 'step': 2361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.840915', 'step': 2361, 'epoch': 1} {'type': 'loss', 'content': 0.19401563704013824, 'timestamp': '2025-09-10 02:28:53.843620', 'step': 2362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:53.875021', 'step': 2362, 'epoch': 1} {'type': 'loss', 'content': 0.14848454296588898, 'timestamp': '2025-09-10 02:28:53.877975', 'step': 2363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:28:53.909227', 'step': 2363, 'epoch': 1} {'type': 'loss', 'content': 0.3631865084171295, 'timestamp': '2025-09-10 02:28:53.934124', 'step': 2364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.965627', 'step': 2364, 'epoch': 1} {'type': 'loss', 'content': 0.21870213747024536, 'timestamp': '2025-09-10 02:28:53.967736', 'step': 2365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:53.998496', 'step': 2365, 'epoch': 1} {'type': 'loss', 'content': 0.2675988972187042, 'timestamp': '2025-09-10 02:28:54.001211', 'step': 2366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:54.031953', 'step': 2366, 'epoch': 1} {'type': 'loss', 'content': 0.1512768417596817, 'timestamp': '2025-09-10 02:28:54.034242', 'step': 2367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.064670', 'step': 2367, 'epoch': 1} {'type': 'loss', 'content': 0.20439527928829193, 'timestamp': '2025-09-10 02:28:54.088600', 'step': 2368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.120269', 'step': 2368, 'epoch': 1} {'type': 'loss', 'content': 0.25345274806022644, 'timestamp': '2025-09-10 02:28:54.122697', 'step': 2369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:54.153340', 'step': 2369, 'epoch': 1} {'type': 'loss', 'content': 0.15253323316574097, 'timestamp': '2025-09-10 02:28:54.155833', 'step': 2370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.186211', 'step': 2370, 'epoch': 1} {'type': 'loss', 'content': 0.14246895909309387, 'timestamp': '2025-09-10 02:28:54.188735', 'step': 2371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.219518', 'step': 2371, 'epoch': 1} {'type': 'loss', 'content': 0.12134551256895065, 'timestamp': '2025-09-10 02:28:54.243332', 'step': 2372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.273927', 'step': 2372, 'epoch': 1} {'type': 'loss', 'content': 0.20772267878055573, 'timestamp': '2025-09-10 02:28:54.276480', 'step': 2373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.307207', 'step': 2373, 'epoch': 1} {'type': 'loss', 'content': 0.21620091795921326, 'timestamp': '2025-09-10 02:28:54.310964', 'step': 2374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.342213', 'step': 2374, 'epoch': 1} {'type': 'loss', 'content': 0.24651625752449036, 'timestamp': '2025-09-10 02:28:54.344927', 'step': 2375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:54.375472', 'step': 2375, 'epoch': 1} {'type': 'loss', 'content': 0.18346095085144043, 'timestamp': '2025-09-10 02:28:54.399252', 'step': 2376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.430221', 'step': 2376, 'epoch': 1} {'type': 'loss', 'content': 0.27622920274734497, 'timestamp': '2025-09-10 02:28:54.432821', 'step': 2377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.465811', 'step': 2377, 'epoch': 1} {'type': 'loss', 'content': 0.2394731640815735, 'timestamp': '2025-09-10 02:28:54.468740', 'step': 2378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.500045', 'step': 2378, 'epoch': 1} {'type': 'loss', 'content': 0.1504616141319275, 'timestamp': '2025-09-10 02:28:54.504250', 'step': 2379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.539135', 'step': 2379, 'epoch': 1} {'type': 'loss', 'content': 0.1460249274969101, 'timestamp': '2025-09-10 02:28:54.569847', 'step': 2380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.602159', 'step': 2380, 'epoch': 1} {'type': 'loss', 'content': 0.11500847339630127, 'timestamp': '2025-09-10 02:28:54.605249', 'step': 2381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:54.636757', 'step': 2381, 'epoch': 1} {'type': 'loss', 'content': 0.21732336282730103, 'timestamp': '2025-09-10 02:28:54.639457', 'step': 2382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.670674', 'step': 2382, 'epoch': 1} {'type': 'loss', 'content': 0.1918066442012787, 'timestamp': '2025-09-10 02:28:54.673389', 'step': 2383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:54.704249', 'step': 2383, 'epoch': 1} {'type': 'loss', 'content': 0.30018094182014465, 'timestamp': '2025-09-10 02:28:54.728797', 'step': 2384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:54.760789', 'step': 2384, 'epoch': 1} {'type': 'loss', 'content': 0.20210109651088715, 'timestamp': '2025-09-10 02:28:54.762881', 'step': 2385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:54.794600', 'step': 2385, 'epoch': 1} {'type': 'loss', 'content': 0.21186134219169617, 'timestamp': '2025-09-10 02:28:54.797194', 'step': 2386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.827958', 'step': 2386, 'epoch': 1} {'type': 'loss', 'content': 0.17243152856826782, 'timestamp': '2025-09-10 02:28:54.830389', 'step': 2387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:54.861156', 'step': 2387, 'epoch': 1} {'type': 'loss', 'content': 0.178103968501091, 'timestamp': '2025-09-10 02:28:54.884915', 'step': 2388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.916089', 'step': 2388, 'epoch': 1} {'type': 'loss', 'content': 0.19076506793498993, 'timestamp': '2025-09-10 02:28:54.920905', 'step': 2389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.950965', 'step': 2389, 'epoch': 1} {'type': 'loss', 'content': 0.2113124132156372, 'timestamp': '2025-09-10 02:28:54.953327', 'step': 2390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:54.983512', 'step': 2390, 'epoch': 1} {'type': 'loss', 'content': 0.264361709356308, 'timestamp': '2025-09-10 02:28:54.986227', 'step': 2391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.016459', 'step': 2391, 'epoch': 1} {'type': 'loss', 'content': 0.21243585646152496, 'timestamp': '2025-09-10 02:28:55.041887', 'step': 2392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.072512', 'step': 2392, 'epoch': 1} {'type': 'loss', 'content': 0.2531963288784027, 'timestamp': '2025-09-10 02:28:55.074993', 'step': 2393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:55.105508', 'step': 2393, 'epoch': 1} {'type': 'loss', 'content': 0.11845430731773376, 'timestamp': '2025-09-10 02:28:55.108067', 'step': 2394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.137786', 'step': 2394, 'epoch': 1} {'type': 'loss', 'content': 0.20406384766101837, 'timestamp': '2025-09-10 02:28:55.140016', 'step': 2395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.169675', 'step': 2395, 'epoch': 1} {'type': 'loss', 'content': 0.2111683338880539, 'timestamp': '2025-09-10 02:28:55.193285', 'step': 2396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:55.223765', 'step': 2396, 'epoch': 1} {'type': 'loss', 'content': 0.11610736697912216, 'timestamp': '2025-09-10 02:28:55.226015', 'step': 2397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:55.256117', 'step': 2397, 'epoch': 1} {'type': 'loss', 'content': 0.13011988997459412, 'timestamp': '2025-09-10 02:28:55.258637', 'step': 2398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.289425', 'step': 2398, 'epoch': 1} {'type': 'loss', 'content': 0.18994653224945068, 'timestamp': '2025-09-10 02:28:55.291935', 'step': 2399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:55.321793', 'step': 2399, 'epoch': 1} {'type': 'loss', 'content': 0.16139428317546844, 'timestamp': '2025-09-10 02:28:55.347160', 'step': 2400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:55.377503', 'step': 2400, 'epoch': 1} {'type': 'loss', 'content': 0.1754145324230194, 'timestamp': '2025-09-10 02:28:55.380498', 'step': 2401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.410765', 'step': 2401, 'epoch': 1} {'type': 'loss', 'content': 0.1374313235282898, 'timestamp': '2025-09-10 02:28:55.413126', 'step': 2402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:55.444291', 'step': 2402, 'epoch': 1} {'type': 'loss', 'content': 0.1828121393918991, 'timestamp': '2025-09-10 02:28:55.447406', 'step': 2403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:55.479929', 'step': 2403, 'epoch': 1} {'type': 'loss', 'content': 0.12485120445489883, 'timestamp': '2025-09-10 02:28:55.504574', 'step': 2404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.537143', 'step': 2404, 'epoch': 1} {'type': 'loss', 'content': 0.13909944891929626, 'timestamp': '2025-09-10 02:28:55.542826', 'step': 2405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:55.587330', 'step': 2405, 'epoch': 1} {'type': 'loss', 'content': 0.16957315802574158, 'timestamp': '2025-09-10 02:28:55.590537', 'step': 2406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:55.624755', 'step': 2406, 'epoch': 1} {'type': 'loss', 'content': 0.15963710844516754, 'timestamp': '2025-09-10 02:28:55.628787', 'step': 2407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:55.658942', 'step': 2407, 'epoch': 1} {'type': 'loss', 'content': 0.16074219346046448, 'timestamp': '2025-09-10 02:28:55.682606', 'step': 2408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:55.713030', 'step': 2408, 'epoch': 1} {'type': 'loss', 'content': 0.21035243570804596, 'timestamp': '2025-09-10 02:28:55.715104', 'step': 2409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:55.745313', 'step': 2409, 'epoch': 1} {'type': 'loss', 'content': 0.2387278974056244, 'timestamp': '2025-09-10 02:28:55.747611', 'step': 2410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.778027', 'step': 2410, 'epoch': 1} {'type': 'loss', 'content': 0.3019252419471741, 'timestamp': '2025-09-10 02:28:55.780553', 'step': 2411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:55.810699', 'step': 2411, 'epoch': 1} {'type': 'loss', 'content': 0.23175355792045593, 'timestamp': '2025-09-10 02:28:55.834542', 'step': 2412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:55.865573', 'step': 2412, 'epoch': 1} {'type': 'loss', 'content': 0.21075919270515442, 'timestamp': '2025-09-10 02:28:55.868301', 'step': 2413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:55.898334', 'step': 2413, 'epoch': 1} {'type': 'loss', 'content': 0.20217271149158478, 'timestamp': '2025-09-10 02:28:55.900779', 'step': 2414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.931358', 'step': 2414, 'epoch': 1} {'type': 'loss', 'content': 0.2511752247810364, 'timestamp': '2025-09-10 02:28:55.935294', 'step': 2415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:55.970030', 'step': 2415, 'epoch': 1} {'type': 'loss', 'content': 0.2577601671218872, 'timestamp': '2025-09-10 02:28:55.994072', 'step': 2416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:56.024304', 'step': 2416, 'epoch': 1} {'type': 'loss', 'content': 0.16077908873558044, 'timestamp': '2025-09-10 02:28:56.026885', 'step': 2417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:56.058281', 'step': 2417, 'epoch': 1} {'type': 'loss', 'content': 0.2781776785850525, 'timestamp': '2025-09-10 02:28:56.060785', 'step': 2418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.091362', 'step': 2418, 'epoch': 1} {'type': 'loss', 'content': 0.16533055901527405, 'timestamp': '2025-09-10 02:28:56.093744', 'step': 2419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.123845', 'step': 2419, 'epoch': 1} {'type': 'loss', 'content': 0.17136281728744507, 'timestamp': '2025-09-10 02:28:56.147342', 'step': 2420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:56.178118', 'step': 2420, 'epoch': 1} {'type': 'loss', 'content': 0.16322824358940125, 'timestamp': '2025-09-10 02:28:56.180516', 'step': 2421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:56.210368', 'step': 2421, 'epoch': 1} {'type': 'loss', 'content': 0.2400502860546112, 'timestamp': '2025-09-10 02:28:56.213205', 'step': 2422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:56.244276', 'step': 2422, 'epoch': 1} {'type': 'loss', 'content': 0.20255891978740692, 'timestamp': '2025-09-10 02:28:56.246543', 'step': 2423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:56.277007', 'step': 2423, 'epoch': 1} {'type': 'loss', 'content': 0.12646512687206268, 'timestamp': '2025-09-10 02:28:56.300739', 'step': 2424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.332213', 'step': 2424, 'epoch': 1} {'type': 'loss', 'content': 0.18155227601528168, 'timestamp': '2025-09-10 02:28:56.337159', 'step': 2425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.367206', 'step': 2425, 'epoch': 1} {'type': 'loss', 'content': 0.17337767779827118, 'timestamp': '2025-09-10 02:28:56.369746', 'step': 2426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.400023', 'step': 2426, 'epoch': 1} {'type': 'loss', 'content': 0.1470542699098587, 'timestamp': '2025-09-10 02:28:56.402382', 'step': 2427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:56.432328', 'step': 2427, 'epoch': 1} {'type': 'loss', 'content': 0.25692543387413025, 'timestamp': '2025-09-10 02:28:56.455928', 'step': 2428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.485873', 'step': 2428, 'epoch': 1} {'type': 'loss', 'content': 0.1345549076795578, 'timestamp': '2025-09-10 02:28:56.488100', 'step': 2429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:56.517862', 'step': 2429, 'epoch': 1} {'type': 'loss', 'content': 0.11984024196863174, 'timestamp': '2025-09-10 02:28:56.520511', 'step': 2430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.550877', 'step': 2430, 'epoch': 1} {'type': 'loss', 'content': 0.20300604403018951, 'timestamp': '2025-09-10 02:28:56.553393', 'step': 2431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.583458', 'step': 2431, 'epoch': 1} {'type': 'loss', 'content': 0.17617389559745789, 'timestamp': '2025-09-10 02:28:56.607401', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.637849', 'step': 2432, 'epoch': 1} {'type': 'loss', 'content': 0.18274429440498352, 'timestamp': '2025-09-10 02:28:56.641792', 'step': 2433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:56.672902', 'step': 2433, 'epoch': 1} {'type': 'loss', 'content': 0.12812405824661255, 'timestamp': '2025-09-10 02:28:56.675303', 'step': 2434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.705158', 'step': 2434, 'epoch': 1} {'type': 'loss', 'content': 0.24520976841449738, 'timestamp': '2025-09-10 02:28:56.707616', 'step': 2435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.739220', 'step': 2435, 'epoch': 1} {'type': 'loss', 'content': 0.11619998514652252, 'timestamp': '2025-09-10 02:28:56.764614', 'step': 2436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.795240', 'step': 2436, 'epoch': 1} {'type': 'loss', 'content': 0.1291448175907135, 'timestamp': '2025-09-10 02:28:56.798079', 'step': 2437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.829157', 'step': 2437, 'epoch': 1} {'type': 'loss', 'content': 0.15183904767036438, 'timestamp': '2025-09-10 02:28:56.831732', 'step': 2438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:56.862183', 'step': 2438, 'epoch': 1} {'type': 'loss', 'content': 0.1617838740348816, 'timestamp': '2025-09-10 02:28:56.864310', 'step': 2439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:56.895114', 'step': 2439, 'epoch': 1} {'type': 'loss', 'content': 0.17677268385887146, 'timestamp': '2025-09-10 02:28:56.919274', 'step': 2440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.951144', 'step': 2440, 'epoch': 1} {'type': 'loss', 'content': 0.3013598322868347, 'timestamp': '2025-09-10 02:28:56.954426', 'step': 2441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:56.987595', 'step': 2441, 'epoch': 1} {'type': 'loss', 'content': 0.2061314880847931, 'timestamp': '2025-09-10 02:28:56.990110', 'step': 2442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:57.020572', 'step': 2442, 'epoch': 1} {'type': 'loss', 'content': 0.19576114416122437, 'timestamp': '2025-09-10 02:28:57.022881', 'step': 2443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.054284', 'step': 2443, 'epoch': 1} {'type': 'loss', 'content': 0.22997836768627167, 'timestamp': '2025-09-10 02:28:57.078269', 'step': 2444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:57.109893', 'step': 2444, 'epoch': 1} {'type': 'loss', 'content': 0.07301321625709534, 'timestamp': '2025-09-10 02:28:57.112503', 'step': 2445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.142618', 'step': 2445, 'epoch': 1} {'type': 'loss', 'content': 0.17106151580810547, 'timestamp': '2025-09-10 02:28:57.145227', 'step': 2446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:57.177464', 'step': 2446, 'epoch': 1} {'type': 'loss', 'content': 0.12625949084758759, 'timestamp': '2025-09-10 02:28:57.180063', 'step': 2447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.211116', 'step': 2447, 'epoch': 1} {'type': 'loss', 'content': 0.17556846141815186, 'timestamp': '2025-09-10 02:28:57.234885', 'step': 2448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.265763', 'step': 2448, 'epoch': 1} {'type': 'loss', 'content': 0.20627827942371368, 'timestamp': '2025-09-10 02:28:57.267929', 'step': 2449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:57.298488', 'step': 2449, 'epoch': 1} {'type': 'loss', 'content': 0.17197363078594208, 'timestamp': '2025-09-10 02:28:57.301124', 'step': 2450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.332252', 'step': 2450, 'epoch': 1} {'type': 'loss', 'content': 0.12195522338151932, 'timestamp': '2025-09-10 02:28:57.335101', 'step': 2451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.365357', 'step': 2451, 'epoch': 1} {'type': 'loss', 'content': 0.23994967341423035, 'timestamp': '2025-09-10 02:28:57.389245', 'step': 2452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.419962', 'step': 2452, 'epoch': 1} {'type': 'loss', 'content': 0.15987379848957062, 'timestamp': '2025-09-10 02:28:57.422725', 'step': 2453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.452635', 'step': 2453, 'epoch': 1} {'type': 'loss', 'content': 0.1506168246269226, 'timestamp': '2025-09-10 02:28:57.455264', 'step': 2454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.486516', 'step': 2454, 'epoch': 1} {'type': 'loss', 'content': 0.15380261838436127, 'timestamp': '2025-09-10 02:28:57.489073', 'step': 2455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:57.520029', 'step': 2455, 'epoch': 1} {'type': 'loss', 'content': 0.16792207956314087, 'timestamp': '2025-09-10 02:28:57.547336', 'step': 2456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.583544', 'step': 2456, 'epoch': 1} {'type': 'loss', 'content': 0.17591917514801025, 'timestamp': '2025-09-10 02:28:57.586196', 'step': 2457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.616093', 'step': 2457, 'epoch': 1} {'type': 'loss', 'content': 0.15553371608257294, 'timestamp': '2025-09-10 02:28:57.618688', 'step': 2458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.649811', 'step': 2458, 'epoch': 1} {'type': 'loss', 'content': 0.236186683177948, 'timestamp': '2025-09-10 02:28:57.653775', 'step': 2459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.684777', 'step': 2459, 'epoch': 1} {'type': 'loss', 'content': 0.1530204713344574, 'timestamp': '2025-09-10 02:28:57.708842', 'step': 2460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.741574', 'step': 2460, 'epoch': 1} {'type': 'loss', 'content': 0.19255110621452332, 'timestamp': '2025-09-10 02:28:57.743955', 'step': 2461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.773777', 'step': 2461, 'epoch': 1} {'type': 'loss', 'content': 0.1755537986755371, 'timestamp': '2025-09-10 02:28:57.776159', 'step': 2462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.806714', 'step': 2462, 'epoch': 1} {'type': 'loss', 'content': 0.25686657428741455, 'timestamp': '2025-09-10 02:28:57.809257', 'step': 2463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.840619', 'step': 2463, 'epoch': 1} {'type': 'loss', 'content': 0.2144172489643097, 'timestamp': '2025-09-10 02:28:57.864909', 'step': 2464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:57.895436', 'step': 2464, 'epoch': 1} {'type': 'loss', 'content': 0.2370755672454834, 'timestamp': '2025-09-10 02:28:57.898979', 'step': 2465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.929638', 'step': 2465, 'epoch': 1} {'type': 'loss', 'content': 0.1796196848154068, 'timestamp': '2025-09-10 02:28:57.932014', 'step': 2466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.962348', 'step': 2466, 'epoch': 1} {'type': 'loss', 'content': 0.12459004670381546, 'timestamp': '2025-09-10 02:28:57.965245', 'step': 2467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:57.995632', 'step': 2467, 'epoch': 1} {'type': 'loss', 'content': 0.20039530098438263, 'timestamp': '2025-09-10 02:28:58.019827', 'step': 2468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:58.051306', 'step': 2468, 'epoch': 1} {'type': 'loss', 'content': 0.11698941886425018, 'timestamp': '2025-09-10 02:28:58.053747', 'step': 2469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.086399', 'step': 2469, 'epoch': 1} {'type': 'loss', 'content': 0.3271934390068054, 'timestamp': '2025-09-10 02:28:58.088980', 'step': 2470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.120115', 'step': 2470, 'epoch': 1} {'type': 'loss', 'content': 0.13991641998291016, 'timestamp': '2025-09-10 02:28:58.122853', 'step': 2471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.153774', 'step': 2471, 'epoch': 1} {'type': 'loss', 'content': 0.1714220643043518, 'timestamp': '2025-09-10 02:28:58.177376', 'step': 2472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.208099', 'step': 2472, 'epoch': 1} {'type': 'loss', 'content': 0.18970033526420593, 'timestamp': '2025-09-10 02:28:58.210578', 'step': 2473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.240931', 'step': 2473, 'epoch': 1} {'type': 'loss', 'content': 0.20573054254055023, 'timestamp': '2025-09-10 02:28:58.243085', 'step': 2474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:58.273775', 'step': 2474, 'epoch': 1} {'type': 'loss', 'content': 0.11335407942533493, 'timestamp': '2025-09-10 02:28:58.276478', 'step': 2475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:58.306589', 'step': 2475, 'epoch': 1} {'type': 'loss', 'content': 0.18174605071544647, 'timestamp': '2025-09-10 02:28:58.330564', 'step': 2476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:58.361589', 'step': 2476, 'epoch': 1} {'type': 'loss', 'content': 0.24432317912578583, 'timestamp': '2025-09-10 02:28:58.365055', 'step': 2477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:58.395814', 'step': 2477, 'epoch': 1} {'type': 'loss', 'content': 0.08087807893753052, 'timestamp': '2025-09-10 02:28:58.398354', 'step': 2478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.428412', 'step': 2478, 'epoch': 1} {'type': 'loss', 'content': 0.1617082804441452, 'timestamp': '2025-09-10 02:28:58.430726', 'step': 2479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.460996', 'step': 2479, 'epoch': 1} {'type': 'loss', 'content': 0.1341724991798401, 'timestamp': '2025-09-10 02:28:58.486223', 'step': 2480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:58.519077', 'step': 2480, 'epoch': 1} {'type': 'loss', 'content': 0.2622028887271881, 'timestamp': '2025-09-10 02:28:58.521359', 'step': 2481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:58.551695', 'step': 2481, 'epoch': 1} {'type': 'loss', 'content': 0.14705991744995117, 'timestamp': '2025-09-10 02:28:58.556702', 'step': 2482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.589656', 'step': 2482, 'epoch': 1} {'type': 'loss', 'content': 0.23134253919124603, 'timestamp': '2025-09-10 02:28:58.592591', 'step': 2483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.623145', 'step': 2483, 'epoch': 1} {'type': 'loss', 'content': 0.1881987452507019, 'timestamp': '2025-09-10 02:28:58.646994', 'step': 2484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:58.677023', 'step': 2484, 'epoch': 1} {'type': 'loss', 'content': 0.22995613515377045, 'timestamp': '2025-09-10 02:28:58.679760', 'step': 2485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.711723', 'step': 2485, 'epoch': 1} {'type': 'loss', 'content': 0.23707029223442078, 'timestamp': '2025-09-10 02:28:58.714399', 'step': 2486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.745264', 'step': 2486, 'epoch': 1} {'type': 'loss', 'content': 0.11644862592220306, 'timestamp': '2025-09-10 02:28:58.747662', 'step': 2487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:28:58.778928', 'step': 2487, 'epoch': 1} {'type': 'loss', 'content': 0.2887480854988098, 'timestamp': '2025-09-10 02:28:58.802736', 'step': 2488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.833772', 'step': 2488, 'epoch': 1} {'type': 'loss', 'content': 0.18933722376823425, 'timestamp': '2025-09-10 02:28:58.836120', 'step': 2489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:58.867572', 'step': 2489, 'epoch': 1} {'type': 'loss', 'content': 0.14526967704296112, 'timestamp': '2025-09-10 02:28:58.871178', 'step': 2490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:58.902251', 'step': 2490, 'epoch': 1} {'type': 'loss', 'content': 0.20854799449443817, 'timestamp': '2025-09-10 02:28:58.905725', 'step': 2491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.937504', 'step': 2491, 'epoch': 1} {'type': 'loss', 'content': 0.14797650277614594, 'timestamp': '2025-09-10 02:28:58.961321', 'step': 2492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:58.992295', 'step': 2492, 'epoch': 1} {'type': 'loss', 'content': 0.133588507771492, 'timestamp': '2025-09-10 02:28:58.995603', 'step': 2493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:59.026625', 'step': 2493, 'epoch': 1} {'type': 'loss', 'content': 0.2946397662162781, 'timestamp': '2025-09-10 02:28:59.029524', 'step': 2494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:59.060468', 'step': 2494, 'epoch': 1} {'type': 'loss', 'content': 0.25926443934440613, 'timestamp': '2025-09-10 02:28:59.063238', 'step': 2495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:59.095660', 'step': 2495, 'epoch': 1} {'type': 'loss', 'content': 0.14614318311214447, 'timestamp': '2025-09-10 02:28:59.119870', 'step': 2496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:59.151545', 'step': 2496, 'epoch': 1} {'type': 'loss', 'content': 0.13192030787467957, 'timestamp': '2025-09-10 02:28:59.154086', 'step': 2497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:28:59.184868', 'step': 2497, 'epoch': 1} {'type': 'loss', 'content': 0.19357073307037354, 'timestamp': '2025-09-10 02:28:59.187881', 'step': 2498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:59.218428', 'step': 2498, 'epoch': 1} {'type': 'loss', 'content': 0.11230704188346863, 'timestamp': '2025-09-10 02:28:59.222473', 'step': 2499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:59.254294', 'step': 2499, 'epoch': 1} {'type': 'loss', 'content': 0.20284801721572876, 'timestamp': '2025-09-10 02:28:59.277878', 'step': 2500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-09-10 02:29:04.104035', 'step': 2500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:04.150514', 'step': 2500, 'epoch': 1} {'type': 'loss', 'content': 0.1758245825767517, 'timestamp': '2025-09-10 02:29:04.153772', 'step': 2501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:04.189461', 'step': 2501, 'epoch': 1} {'type': 'loss', 'content': 0.20345553755760193, 'timestamp': '2025-09-10 02:29:04.193167', 'step': 2502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:04.223487', 'step': 2502, 'epoch': 1} {'type': 'loss', 'content': 0.10887161642313004, 'timestamp': '2025-09-10 02:29:04.228178', 'step': 2503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:04.259347', 'step': 2503, 'epoch': 1} {'type': 'loss', 'content': 0.19377204775810242, 'timestamp': '2025-09-10 02:29:04.284952', 'step': 2504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:04.317208', 'step': 2504, 'epoch': 1} {'type': 'loss', 'content': 0.21601375937461853, 'timestamp': '2025-09-10 02:29:04.320222', 'step': 2505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:04.352645', 'step': 2505, 'epoch': 1} {'type': 'loss', 'content': 0.14417816698551178, 'timestamp': '2025-09-10 02:29:04.355914', 'step': 2506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:04.399204', 'step': 2506, 'epoch': 1} {'type': 'loss', 'content': 0.1556844413280487, 'timestamp': '2025-09-10 02:29:04.401578', 'step': 2507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:04.439628', 'step': 2507, 'epoch': 1} {'type': 'loss', 'content': 0.29271432757377625, 'timestamp': '2025-09-10 02:29:04.466326', 'step': 2508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:04.507032', 'step': 2508, 'epoch': 1} {'type': 'loss', 'content': 0.10718739032745361, 'timestamp': '2025-09-10 02:29:04.511561', 'step': 2509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:04.542489', 'step': 2509, 'epoch': 1} {'type': 'loss', 'content': 0.1549156904220581, 'timestamp': '2025-09-10 02:29:04.546986', 'step': 2510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:04.580395', 'step': 2510, 'epoch': 1} {'type': 'loss', 'content': 0.17969505488872528, 'timestamp': '2025-09-10 02:29:04.585622', 'step': 2511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:04.637620', 'step': 2511, 'epoch': 1} {'type': 'loss', 'content': 0.15963158011436462, 'timestamp': '2025-09-10 02:29:04.662392', 'step': 2512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:04.698512', 'step': 2512, 'epoch': 1} {'type': 'loss', 'content': 0.29851827025413513, 'timestamp': '2025-09-10 02:29:04.700806', 'step': 2513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:04.730919', 'step': 2513, 'epoch': 1} {'type': 'loss', 'content': 0.15238814055919647, 'timestamp': '2025-09-10 02:29:04.733260', 'step': 2514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:04.763347', 'step': 2514, 'epoch': 1} {'type': 'loss', 'content': 0.1703653335571289, 'timestamp': '2025-09-10 02:29:04.765929', 'step': 2515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:04.795944', 'step': 2515, 'epoch': 1} {'type': 'loss', 'content': 0.20641960203647614, 'timestamp': '2025-09-10 02:29:04.819599', 'step': 2516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:04.850814', 'step': 2516, 'epoch': 1} {'type': 'loss', 'content': 0.19807486236095428, 'timestamp': '2025-09-10 02:29:04.853273', 'step': 2517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:04.883288', 'step': 2517, 'epoch': 1} {'type': 'loss', 'content': 0.11277609318494797, 'timestamp': '2025-09-10 02:29:04.885736', 'step': 2518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:04.916464', 'step': 2518, 'epoch': 1} {'type': 'loss', 'content': 0.15290617942810059, 'timestamp': '2025-09-10 02:29:04.919320', 'step': 2519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:04.950733', 'step': 2519, 'epoch': 1} {'type': 'loss', 'content': 0.17381733655929565, 'timestamp': '2025-09-10 02:29:04.975139', 'step': 2520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:05.006509', 'step': 2520, 'epoch': 1} {'type': 'loss', 'content': 0.3082517087459564, 'timestamp': '2025-09-10 02:29:05.008766', 'step': 2521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:05.044051', 'step': 2521, 'epoch': 1} {'type': 'loss', 'content': 0.08430135995149612, 'timestamp': '2025-09-10 02:29:05.051452', 'step': 2522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:05.084300', 'step': 2522, 'epoch': 1} {'type': 'loss', 'content': 0.13247017562389374, 'timestamp': '2025-09-10 02:29:05.086803', 'step': 2523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:05.116858', 'step': 2523, 'epoch': 1} {'type': 'loss', 'content': 0.1427409052848816, 'timestamp': '2025-09-10 02:29:05.140452', 'step': 2524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:05.172521', 'step': 2524, 'epoch': 1} {'type': 'loss', 'content': 0.23928110301494598, 'timestamp': '2025-09-10 02:29:05.175018', 'step': 2525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:05.205888', 'step': 2525, 'epoch': 1} {'type': 'loss', 'content': 0.12197444587945938, 'timestamp': '2025-09-10 02:29:05.208459', 'step': 2526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:05.238134', 'step': 2526, 'epoch': 1} {'type': 'loss', 'content': 0.25056928396224976, 'timestamp': '2025-09-10 02:29:05.240414', 'step': 2527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:05.270023', 'step': 2527, 'epoch': 1} {'type': 'loss', 'content': 0.2672160565853119, 'timestamp': '2025-09-10 02:29:05.293617', 'step': 2528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:05.325124', 'step': 2528, 'epoch': 1} {'type': 'loss', 'content': 0.1860681176185608, 'timestamp': '2025-09-10 02:29:05.327942', 'step': 2529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:05.360433', 'step': 2529, 'epoch': 1} {'type': 'loss', 'content': 0.20995952188968658, 'timestamp': '2025-09-10 02:29:05.362716', 'step': 2530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:05.395875', 'step': 2530, 'epoch': 1} {'type': 'loss', 'content': 0.20078465342521667, 'timestamp': '2025-09-10 02:29:05.399310', 'step': 2531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:05.430122', 'step': 2531, 'epoch': 1} {'type': 'loss', 'content': 0.18274222314357758, 'timestamp': '2025-09-10 02:29:05.454056', 'step': 2532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:05.484560', 'step': 2532, 'epoch': 1} {'type': 'loss', 'content': 0.18443655967712402, 'timestamp': '2025-09-10 02:29:05.489025', 'step': 2533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:05.528544', 'step': 2533, 'epoch': 1} {'type': 'loss', 'content': 0.1360720545053482, 'timestamp': '2025-09-10 02:29:05.531478', 'step': 2534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:05.565212', 'step': 2534, 'epoch': 1} {'type': 'loss', 'content': 0.17437171936035156, 'timestamp': '2025-09-10 02:29:05.570943', 'step': 2535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:05.624322', 'step': 2535, 'epoch': 1} {'type': 'loss', 'content': 0.22308939695358276, 'timestamp': '2025-09-10 02:29:05.648107', 'step': 2536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:05.678948', 'step': 2536, 'epoch': 1} {'type': 'loss', 'content': 0.09668607264757156, 'timestamp': '2025-09-10 02:29:05.681266', 'step': 2537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:05.712713', 'step': 2537, 'epoch': 1} {'type': 'loss', 'content': 0.10703276097774506, 'timestamp': '2025-09-10 02:29:05.716484', 'step': 2538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:05.761403', 'step': 2538, 'epoch': 1} {'type': 'loss', 'content': 0.18169960379600525, 'timestamp': '2025-09-10 02:29:05.766112', 'step': 2539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:05.799315', 'step': 2539, 'epoch': 1} {'type': 'loss', 'content': 0.18719260394573212, 'timestamp': '2025-09-10 02:29:05.824193', 'step': 2540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:05.858770', 'step': 2540, 'epoch': 1} {'type': 'loss', 'content': 0.19732403755187988, 'timestamp': '2025-09-10 02:29:05.861714', 'step': 2541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:05.895029', 'step': 2541, 'epoch': 1} {'type': 'loss', 'content': 0.10995238274335861, 'timestamp': '2025-09-10 02:29:05.897819', 'step': 2542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:05.941726', 'step': 2542, 'epoch': 1} {'type': 'loss', 'content': 0.11855259537696838, 'timestamp': '2025-09-10 02:29:05.961448', 'step': 2543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.036044', 'step': 2543, 'epoch': 1} {'type': 'loss', 'content': 0.18063904345035553, 'timestamp': '2025-09-10 02:29:06.076382', 'step': 2544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.153254', 'step': 2544, 'epoch': 1} {'type': 'loss', 'content': 0.17579048871994019, 'timestamp': '2025-09-10 02:29:06.173027', 'step': 2545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:06.244738', 'step': 2545, 'epoch': 1} {'type': 'loss', 'content': 0.14706186950206757, 'timestamp': '2025-09-10 02:29:06.260430', 'step': 2546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:06.329971', 'step': 2546, 'epoch': 1} {'type': 'loss', 'content': 0.1973029524087906, 'timestamp': '2025-09-10 02:29:06.350243', 'step': 2547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:06.419900', 'step': 2547, 'epoch': 1} {'type': 'loss', 'content': 0.2525072693824768, 'timestamp': '2025-09-10 02:29:06.458837', 'step': 2548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:06.522465', 'step': 2548, 'epoch': 1} {'type': 'loss', 'content': 0.22745400667190552, 'timestamp': '2025-09-10 02:29:06.538247', 'step': 2549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:06.623012', 'step': 2549, 'epoch': 1} {'type': 'loss', 'content': 0.11257676780223846, 'timestamp': '2025-09-10 02:29:06.640685', 'step': 2550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.709035', 'step': 2550, 'epoch': 1} {'type': 'loss', 'content': 0.21380089223384857, 'timestamp': '2025-09-10 02:29:06.725072', 'step': 2551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.794861', 'step': 2551, 'epoch': 1} {'type': 'loss', 'content': 0.1153918132185936, 'timestamp': '2025-09-10 02:29:06.833470', 'step': 2552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:06.915381', 'step': 2552, 'epoch': 1} {'type': 'loss', 'content': 0.21339376270771027, 'timestamp': '2025-09-10 02:29:06.931259', 'step': 2553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:06.992244', 'step': 2553, 'epoch': 1} {'type': 'loss', 'content': 0.16899581253528595, 'timestamp': '2025-09-10 02:29:06.997515', 'step': 2554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:07.029885', 'step': 2554, 'epoch': 1} {'type': 'loss', 'content': 0.19768358767032623, 'timestamp': '2025-09-10 02:29:07.033374', 'step': 2555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.064367', 'step': 2555, 'epoch': 1} {'type': 'loss', 'content': 0.1488928645849228, 'timestamp': '2025-09-10 02:29:07.089261', 'step': 2556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:07.123185', 'step': 2556, 'epoch': 1} {'type': 'loss', 'content': 0.1372831165790558, 'timestamp': '2025-09-10 02:29:07.125968', 'step': 2557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:07.161774', 'step': 2557, 'epoch': 1} {'type': 'loss', 'content': 0.13676294684410095, 'timestamp': '2025-09-10 02:29:07.166543', 'step': 2558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:07.197218', 'step': 2558, 'epoch': 1} {'type': 'loss', 'content': 0.0949331596493721, 'timestamp': '2025-09-10 02:29:07.202885', 'step': 2559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:07.235566', 'step': 2559, 'epoch': 1} {'type': 'loss', 'content': 0.22161230444908142, 'timestamp': '2025-09-10 02:29:07.261095', 'step': 2560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:07.291685', 'step': 2560, 'epoch': 1} {'type': 'loss', 'content': 0.12660059332847595, 'timestamp': '2025-09-10 02:29:07.294647', 'step': 2561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:07.326417', 'step': 2561, 'epoch': 1} {'type': 'loss', 'content': 0.06589655578136444, 'timestamp': '2025-09-10 02:29:07.329132', 'step': 2562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:07.361850', 'step': 2562, 'epoch': 1} {'type': 'loss', 'content': 0.09102779626846313, 'timestamp': '2025-09-10 02:29:07.364473', 'step': 2563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:07.394407', 'step': 2563, 'epoch': 1} {'type': 'loss', 'content': 0.08218231797218323, 'timestamp': '2025-09-10 02:29:07.418334', 'step': 2564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:07.449050', 'step': 2564, 'epoch': 1} {'type': 'loss', 'content': 0.16115565598011017, 'timestamp': '2025-09-10 02:29:07.451875', 'step': 2565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:07.483091', 'step': 2565, 'epoch': 1} {'type': 'loss', 'content': 0.14032311737537384, 'timestamp': '2025-09-10 02:29:07.485970', 'step': 2566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.517640', 'step': 2566, 'epoch': 1} {'type': 'loss', 'content': 0.1477043181657791, 'timestamp': '2025-09-10 02:29:07.521678', 'step': 2567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:07.554303', 'step': 2567, 'epoch': 1} {'type': 'loss', 'content': 0.14502424001693726, 'timestamp': '2025-09-10 02:29:07.578447', 'step': 2568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:07.616995', 'step': 2568, 'epoch': 1} {'type': 'loss', 'content': 0.11147982627153397, 'timestamp': '2025-09-10 02:29:07.619749', 'step': 2569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:07.649719', 'step': 2569, 'epoch': 1} {'type': 'loss', 'content': 0.16898052394390106, 'timestamp': '2025-09-10 02:29:07.653272', 'step': 2570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:07.684140', 'step': 2570, 'epoch': 1} {'type': 'loss', 'content': 0.25251469016075134, 'timestamp': '2025-09-10 02:29:07.686635', 'step': 2571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.717439', 'step': 2571, 'epoch': 1} {'type': 'loss', 'content': 0.1464998573064804, 'timestamp': '2025-09-10 02:29:07.742225', 'step': 2572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:07.773376', 'step': 2572, 'epoch': 1} {'type': 'loss', 'content': 0.09964415431022644, 'timestamp': '2025-09-10 02:29:07.775638', 'step': 2573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.805840', 'step': 2573, 'epoch': 1} {'type': 'loss', 'content': 0.13140757381916046, 'timestamp': '2025-09-10 02:29:07.808154', 'step': 2574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:07.840546', 'step': 2574, 'epoch': 1} {'type': 'loss', 'content': 0.1540285050868988, 'timestamp': '2025-09-10 02:29:07.844542', 'step': 2575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:07.875996', 'step': 2575, 'epoch': 1} {'type': 'loss', 'content': 0.08478930592536926, 'timestamp': '2025-09-10 02:29:07.899723', 'step': 2576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:07.931036', 'step': 2576, 'epoch': 1} {'type': 'loss', 'content': 0.11007839441299438, 'timestamp': '2025-09-10 02:29:07.933732', 'step': 2577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:07.965003', 'step': 2577, 'epoch': 1} {'type': 'loss', 'content': 0.24835443496704102, 'timestamp': '2025-09-10 02:29:07.967657', 'step': 2578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.999444', 'step': 2578, 'epoch': 1} {'type': 'loss', 'content': 0.2203468680381775, 'timestamp': '2025-09-10 02:29:08.002094', 'step': 2579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:08.032426', 'step': 2579, 'epoch': 1} {'type': 'loss', 'content': 0.08321608603000641, 'timestamp': '2025-09-10 02:29:08.056257', 'step': 2580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:08.087336', 'step': 2580, 'epoch': 1} {'type': 'loss', 'content': 0.18291229009628296, 'timestamp': '2025-09-10 02:29:08.090579', 'step': 2581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:08.122532', 'step': 2581, 'epoch': 1} {'type': 'loss', 'content': 0.18857809901237488, 'timestamp': '2025-09-10 02:29:08.125516', 'step': 2582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:08.157496', 'step': 2582, 'epoch': 1} {'type': 'loss', 'content': 0.1571415811777115, 'timestamp': '2025-09-10 02:29:08.159840', 'step': 2583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.190307', 'step': 2583, 'epoch': 1} {'type': 'loss', 'content': 0.18051812052726746, 'timestamp': '2025-09-10 02:29:08.214604', 'step': 2584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:08.246618', 'step': 2584, 'epoch': 1} {'type': 'loss', 'content': 0.1563502699136734, 'timestamp': '2025-09-10 02:29:08.248983', 'step': 2585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:08.279907', 'step': 2585, 'epoch': 1} {'type': 'loss', 'content': 0.13886462152004242, 'timestamp': '2025-09-10 02:29:08.282678', 'step': 2586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.312931', 'step': 2586, 'epoch': 1} {'type': 'loss', 'content': 0.16433186829090118, 'timestamp': '2025-09-10 02:29:08.315633', 'step': 2587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.346681', 'step': 2587, 'epoch': 1} {'type': 'loss', 'content': 0.18111003935337067, 'timestamp': '2025-09-10 02:29:08.370199', 'step': 2588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.401432', 'step': 2588, 'epoch': 1} {'type': 'loss', 'content': 0.15658795833587646, 'timestamp': '2025-09-10 02:29:08.403705', 'step': 2589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:08.437227', 'step': 2589, 'epoch': 1} {'type': 'loss', 'content': 0.11907454580068588, 'timestamp': '2025-09-10 02:29:08.439835', 'step': 2590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:08.478416', 'step': 2590, 'epoch': 1} {'type': 'loss', 'content': 0.18467536568641663, 'timestamp': '2025-09-10 02:29:08.480377', 'step': 2591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.510458', 'step': 2591, 'epoch': 1} {'type': 'loss', 'content': 0.14444439113140106, 'timestamp': '2025-09-10 02:29:08.533796', 'step': 2592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:08.566701', 'step': 2592, 'epoch': 1} {'type': 'loss', 'content': 0.17315492033958435, 'timestamp': '2025-09-10 02:29:08.569093', 'step': 2593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.599894', 'step': 2593, 'epoch': 1} {'type': 'loss', 'content': 0.16090141236782074, 'timestamp': '2025-09-10 02:29:08.602918', 'step': 2594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.637681', 'step': 2594, 'epoch': 1} {'type': 'loss', 'content': 0.06501688063144684, 'timestamp': '2025-09-10 02:29:08.641785', 'step': 2595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.677347', 'step': 2595, 'epoch': 1} {'type': 'loss', 'content': 0.2973721921443939, 'timestamp': '2025-09-10 02:29:08.701210', 'step': 2596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.733357', 'step': 2596, 'epoch': 1} {'type': 'loss', 'content': 0.08351250737905502, 'timestamp': '2025-09-10 02:29:08.735351', 'step': 2597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:08.771525', 'step': 2597, 'epoch': 1} {'type': 'loss', 'content': 0.11214742809534073, 'timestamp': '2025-09-10 02:29:08.773889', 'step': 2598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.806419', 'step': 2598, 'epoch': 1} {'type': 'loss', 'content': 0.17815400660037994, 'timestamp': '2025-09-10 02:29:08.811021', 'step': 2599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:08.846529', 'step': 2599, 'epoch': 1} {'type': 'loss', 'content': 0.21446877717971802, 'timestamp': '2025-09-10 02:29:08.871999', 'step': 2600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:08.903205', 'step': 2600, 'epoch': 1} {'type': 'loss', 'content': 0.13085974752902985, 'timestamp': '2025-09-10 02:29:08.905619', 'step': 2601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:08.938712', 'step': 2601, 'epoch': 1} {'type': 'loss', 'content': 0.19422678649425507, 'timestamp': '2025-09-10 02:29:08.940838', 'step': 2602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:08.971883', 'step': 2602, 'epoch': 1} {'type': 'loss', 'content': 0.1320684552192688, 'timestamp': '2025-09-10 02:29:08.974151', 'step': 2603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:09.007682', 'step': 2603, 'epoch': 1} {'type': 'loss', 'content': 0.13177061080932617, 'timestamp': '2025-09-10 02:29:09.033827', 'step': 2604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:09.064207', 'step': 2604, 'epoch': 1} {'type': 'loss', 'content': 0.1760057657957077, 'timestamp': '2025-09-10 02:29:09.067091', 'step': 2605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:09.100255', 'step': 2605, 'epoch': 1} {'type': 'loss', 'content': 0.24287335574626923, 'timestamp': '2025-09-10 02:29:09.102509', 'step': 2606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:09.134940', 'step': 2606, 'epoch': 1} {'type': 'loss', 'content': 0.2076103687286377, 'timestamp': '2025-09-10 02:29:09.137526', 'step': 2607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:09.168863', 'step': 2607, 'epoch': 1} {'type': 'loss', 'content': 0.10104767978191376, 'timestamp': '2025-09-10 02:29:09.192425', 'step': 2608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:09.222906', 'step': 2608, 'epoch': 1} {'type': 'loss', 'content': 0.20857520401477814, 'timestamp': '2025-09-10 02:29:09.225356', 'step': 2609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.255928', 'step': 2609, 'epoch': 1} {'type': 'loss', 'content': 0.17813663184642792, 'timestamp': '2025-09-10 02:29:09.260491', 'step': 2610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:09.291955', 'step': 2610, 'epoch': 1} {'type': 'loss', 'content': 0.14483560621738434, 'timestamp': '2025-09-10 02:29:09.294421', 'step': 2611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:09.325484', 'step': 2611, 'epoch': 1} {'type': 'loss', 'content': 0.13625957071781158, 'timestamp': '2025-09-10 02:29:09.349172', 'step': 2612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:09.380827', 'step': 2612, 'epoch': 1} {'type': 'loss', 'content': 0.18499760329723358, 'timestamp': '2025-09-10 02:29:09.383883', 'step': 2613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:09.414206', 'step': 2613, 'epoch': 1} {'type': 'loss', 'content': 0.11427085846662521, 'timestamp': '2025-09-10 02:29:09.416928', 'step': 2614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.447978', 'step': 2614, 'epoch': 1} {'type': 'loss', 'content': 0.13258850574493408, 'timestamp': '2025-09-10 02:29:09.451144', 'step': 2615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.481497', 'step': 2615, 'epoch': 1} {'type': 'loss', 'content': 0.29051196575164795, 'timestamp': '2025-09-10 02:29:09.505282', 'step': 2616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:09.537412', 'step': 2616, 'epoch': 1} {'type': 'loss', 'content': 0.3098452091217041, 'timestamp': '2025-09-10 02:29:09.539924', 'step': 2617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:09.570235', 'step': 2617, 'epoch': 1} {'type': 'loss', 'content': 0.13164564967155457, 'timestamp': '2025-09-10 02:29:09.572394', 'step': 2618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.604527', 'step': 2618, 'epoch': 1} {'type': 'loss', 'content': 0.20270568132400513, 'timestamp': '2025-09-10 02:29:09.607389', 'step': 2619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:09.641588', 'step': 2619, 'epoch': 1} {'type': 'loss', 'content': 0.19052745401859283, 'timestamp': '2025-09-10 02:29:09.665546', 'step': 2620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.696941', 'step': 2620, 'epoch': 1} {'type': 'loss', 'content': 0.11827556043863297, 'timestamp': '2025-09-10 02:29:09.699884', 'step': 2621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:09.735418', 'step': 2621, 'epoch': 1} {'type': 'loss', 'content': 0.10365267843008041, 'timestamp': '2025-09-10 02:29:09.739965', 'step': 2622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.773508', 'step': 2622, 'epoch': 1} {'type': 'loss', 'content': 0.14921388030052185, 'timestamp': '2025-09-10 02:29:09.777144', 'step': 2623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:09.823931', 'step': 2623, 'epoch': 1} {'type': 'loss', 'content': 0.22818425297737122, 'timestamp': '2025-09-10 02:29:09.847798', 'step': 2624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.878859', 'step': 2624, 'epoch': 1} {'type': 'loss', 'content': 0.13090042769908905, 'timestamp': '2025-09-10 02:29:09.881255', 'step': 2625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:09.911625', 'step': 2625, 'epoch': 1} {'type': 'loss', 'content': 0.11297999322414398, 'timestamp': '2025-09-10 02:29:09.915483', 'step': 2626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:09.947928', 'step': 2626, 'epoch': 1} {'type': 'loss', 'content': 0.16988657414913177, 'timestamp': '2025-09-10 02:29:09.950739', 'step': 2627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:09.981230', 'step': 2627, 'epoch': 1} {'type': 'loss', 'content': 0.16449230909347534, 'timestamp': '2025-09-10 02:29:10.006937', 'step': 2628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:10.037403', 'step': 2628, 'epoch': 1} {'type': 'loss', 'content': 0.185740128159523, 'timestamp': '2025-09-10 02:29:10.040118', 'step': 2629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:10.070721', 'step': 2629, 'epoch': 1} {'type': 'loss', 'content': 0.08524608612060547, 'timestamp': '2025-09-10 02:29:10.073320', 'step': 2630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.104852', 'step': 2630, 'epoch': 1} {'type': 'loss', 'content': 0.21275420486927032, 'timestamp': '2025-09-10 02:29:10.107337', 'step': 2631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:10.137431', 'step': 2631, 'epoch': 1} {'type': 'loss', 'content': 0.13557972013950348, 'timestamp': '2025-09-10 02:29:10.161756', 'step': 2632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:10.193200', 'step': 2632, 'epoch': 1} {'type': 'loss', 'content': 0.2501484155654907, 'timestamp': '2025-09-10 02:29:10.196174', 'step': 2633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:10.227381', 'step': 2633, 'epoch': 1} {'type': 'loss', 'content': 0.16448721289634705, 'timestamp': '2025-09-10 02:29:10.229682', 'step': 2634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:10.259590', 'step': 2634, 'epoch': 1} {'type': 'loss', 'content': 0.16422943770885468, 'timestamp': '2025-09-10 02:29:10.262682', 'step': 2635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:10.293813', 'step': 2635, 'epoch': 1} {'type': 'loss', 'content': 0.18627215921878815, 'timestamp': '2025-09-10 02:29:10.317997', 'step': 2636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.350899', 'step': 2636, 'epoch': 1} {'type': 'loss', 'content': 0.12077018618583679, 'timestamp': '2025-09-10 02:29:10.353700', 'step': 2637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:10.384488', 'step': 2637, 'epoch': 1} {'type': 'loss', 'content': 0.15335333347320557, 'timestamp': '2025-09-10 02:29:10.387642', 'step': 2638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:10.419258', 'step': 2638, 'epoch': 1} {'type': 'loss', 'content': 0.14041922986507416, 'timestamp': '2025-09-10 02:29:10.421656', 'step': 2639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:10.452374', 'step': 2639, 'epoch': 1} {'type': 'loss', 'content': 0.12905840575695038, 'timestamp': '2025-09-10 02:29:10.476225', 'step': 2640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.507206', 'step': 2640, 'epoch': 1} {'type': 'loss', 'content': 0.24643860757350922, 'timestamp': '2025-09-10 02:29:10.509691', 'step': 2641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.540694', 'step': 2641, 'epoch': 1} {'type': 'loss', 'content': 0.19758227467536926, 'timestamp': '2025-09-10 02:29:10.544221', 'step': 2642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:10.574471', 'step': 2642, 'epoch': 1} {'type': 'loss', 'content': 0.14137983322143555, 'timestamp': '2025-09-10 02:29:10.577109', 'step': 2643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:10.608673', 'step': 2643, 'epoch': 1} {'type': 'loss', 'content': 0.08086734265089035, 'timestamp': '2025-09-10 02:29:10.636030', 'step': 2644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:10.667634', 'step': 2644, 'epoch': 1} {'type': 'loss', 'content': 0.12471944838762283, 'timestamp': '2025-09-10 02:29:10.670600', 'step': 2645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:10.700765', 'step': 2645, 'epoch': 1} {'type': 'loss', 'content': 0.1362348198890686, 'timestamp': '2025-09-10 02:29:10.703190', 'step': 2646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:10.735277', 'step': 2646, 'epoch': 1} {'type': 'loss', 'content': 0.12426245212554932, 'timestamp': '2025-09-10 02:29:10.737764', 'step': 2647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:10.770177', 'step': 2647, 'epoch': 1} {'type': 'loss', 'content': 0.14116890728473663, 'timestamp': '2025-09-10 02:29:10.794086', 'step': 2648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.824837', 'step': 2648, 'epoch': 1} {'type': 'loss', 'content': 0.1208072230219841, 'timestamp': '2025-09-10 02:29:10.827637', 'step': 2649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.861121', 'step': 2649, 'epoch': 1} {'type': 'loss', 'content': 0.21842773258686066, 'timestamp': '2025-09-10 02:29:10.863555', 'step': 2650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.895112', 'step': 2650, 'epoch': 1} {'type': 'loss', 'content': 0.26105713844299316, 'timestamp': '2025-09-10 02:29:10.897642', 'step': 2651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:10.928524', 'step': 2651, 'epoch': 1} {'type': 'loss', 'content': 0.21852290630340576, 'timestamp': '2025-09-10 02:29:10.952719', 'step': 2652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:10.983507', 'step': 2652, 'epoch': 1} {'type': 'loss', 'content': 0.133620023727417, 'timestamp': '2025-09-10 02:29:10.985641', 'step': 2653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.018570', 'step': 2653, 'epoch': 1} {'type': 'loss', 'content': 0.07820887118577957, 'timestamp': '2025-09-10 02:29:11.021232', 'step': 2654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:11.051880', 'step': 2654, 'epoch': 1} {'type': 'loss', 'content': 0.2563129961490631, 'timestamp': '2025-09-10 02:29:11.054495', 'step': 2655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.084558', 'step': 2655, 'epoch': 1} {'type': 'loss', 'content': 0.1307261884212494, 'timestamp': '2025-09-10 02:29:11.109610', 'step': 2656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:11.140239', 'step': 2656, 'epoch': 1} {'type': 'loss', 'content': 0.12286408990621567, 'timestamp': '2025-09-10 02:29:11.142944', 'step': 2657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:11.173344', 'step': 2657, 'epoch': 1} {'type': 'loss', 'content': 0.14156439900398254, 'timestamp': '2025-09-10 02:29:11.175718', 'step': 2658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.206474', 'step': 2658, 'epoch': 1} {'type': 'loss', 'content': 0.132701575756073, 'timestamp': '2025-09-10 02:29:11.208686', 'step': 2659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:11.239641', 'step': 2659, 'epoch': 1} {'type': 'loss', 'content': 0.20332929491996765, 'timestamp': '2025-09-10 02:29:11.265267', 'step': 2660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:11.296519', 'step': 2660, 'epoch': 1} {'type': 'loss', 'content': 0.29332399368286133, 'timestamp': '2025-09-10 02:29:11.299005', 'step': 2661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.329194', 'step': 2661, 'epoch': 1} {'type': 'loss', 'content': 0.1745585799217224, 'timestamp': '2025-09-10 02:29:11.331527', 'step': 2662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:11.363534', 'step': 2662, 'epoch': 1} {'type': 'loss', 'content': 0.14220911264419556, 'timestamp': '2025-09-10 02:29:11.366331', 'step': 2663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.397793', 'step': 2663, 'epoch': 1} {'type': 'loss', 'content': 0.16468003392219543, 'timestamp': '2025-09-10 02:29:11.422198', 'step': 2664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.452845', 'step': 2664, 'epoch': 1} {'type': 'loss', 'content': 0.18845073878765106, 'timestamp': '2025-09-10 02:29:11.455399', 'step': 2665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.485614', 'step': 2665, 'epoch': 1} {'type': 'loss', 'content': 0.11531680077314377, 'timestamp': '2025-09-10 02:29:11.488091', 'step': 2666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:11.518785', 'step': 2666, 'epoch': 1} {'type': 'loss', 'content': 0.19715946912765503, 'timestamp': '2025-09-10 02:29:11.521219', 'step': 2667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:11.551621', 'step': 2667, 'epoch': 1} {'type': 'loss', 'content': 0.1643209010362625, 'timestamp': '2025-09-10 02:29:11.575335', 'step': 2668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:11.866885', 'step': 2668, 'epoch': 1} {'type': 'loss', 'content': 0.2170131802558899, 'timestamp': '2025-09-10 02:29:11.869590', 'step': 2669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:11.902038', 'step': 2669, 'epoch': 1} {'type': 'loss', 'content': 0.1068413257598877, 'timestamp': '2025-09-10 02:29:11.904460', 'step': 2670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:11.936750', 'step': 2670, 'epoch': 1} {'type': 'loss', 'content': 0.1615072637796402, 'timestamp': '2025-09-10 02:29:11.939403', 'step': 2671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.972946', 'step': 2671, 'epoch': 1} {'type': 'loss', 'content': 0.1142156720161438, 'timestamp': '2025-09-10 02:29:11.996943', 'step': 2672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.028637', 'step': 2672, 'epoch': 1} {'type': 'loss', 'content': 0.13359098136425018, 'timestamp': '2025-09-10 02:29:12.035375', 'step': 2673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.068476', 'step': 2673, 'epoch': 1} {'type': 'loss', 'content': 0.12861774861812592, 'timestamp': '2025-09-10 02:29:12.071123', 'step': 2674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.109872', 'step': 2674, 'epoch': 1} {'type': 'loss', 'content': 0.18326593935489655, 'timestamp': '2025-09-10 02:29:12.113597', 'step': 2675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:12.144402', 'step': 2675, 'epoch': 1} {'type': 'loss', 'content': 0.12140163034200668, 'timestamp': '2025-09-10 02:29:12.171465', 'step': 2676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:12.202486', 'step': 2676, 'epoch': 1} {'type': 'loss', 'content': 0.14553895592689514, 'timestamp': '2025-09-10 02:29:12.205326', 'step': 2677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.235723', 'step': 2677, 'epoch': 1} {'type': 'loss', 'content': 0.12216582149267197, 'timestamp': '2025-09-10 02:29:12.240977', 'step': 2678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:12.276373', 'step': 2678, 'epoch': 1} {'type': 'loss', 'content': 0.17029723525047302, 'timestamp': '2025-09-10 02:29:12.279029', 'step': 2679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.308887', 'step': 2679, 'epoch': 1} {'type': 'loss', 'content': 0.19378742575645447, 'timestamp': '2025-09-10 02:29:12.332640', 'step': 2680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.367391', 'step': 2680, 'epoch': 1} {'type': 'loss', 'content': 0.08975417166948318, 'timestamp': '2025-09-10 02:29:12.370502', 'step': 2681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:12.402927', 'step': 2681, 'epoch': 1} {'type': 'loss', 'content': 0.17274105548858643, 'timestamp': '2025-09-10 02:29:12.405562', 'step': 2682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.440406', 'step': 2682, 'epoch': 1} {'type': 'loss', 'content': 0.16264842450618744, 'timestamp': '2025-09-10 02:29:12.443570', 'step': 2683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:12.479281', 'step': 2683, 'epoch': 1} {'type': 'loss', 'content': 0.17548762261867523, 'timestamp': '2025-09-10 02:29:12.503445', 'step': 2684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.534605', 'step': 2684, 'epoch': 1} {'type': 'loss', 'content': 0.11880664527416229, 'timestamp': '2025-09-10 02:29:12.537586', 'step': 2685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.568487', 'step': 2685, 'epoch': 1} {'type': 'loss', 'content': 0.182473286986351, 'timestamp': '2025-09-10 02:29:12.570955', 'step': 2686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.601135', 'step': 2686, 'epoch': 1} {'type': 'loss', 'content': 0.1542688012123108, 'timestamp': '2025-09-10 02:29:12.610864', 'step': 2687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.645391', 'step': 2687, 'epoch': 1} {'type': 'loss', 'content': 0.13376836478710175, 'timestamp': '2025-09-10 02:29:12.670048', 'step': 2688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.701486', 'step': 2688, 'epoch': 1} {'type': 'loss', 'content': 0.12617775797843933, 'timestamp': '2025-09-10 02:29:12.704389', 'step': 2689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.739179', 'step': 2689, 'epoch': 1} {'type': 'loss', 'content': 0.17655536532402039, 'timestamp': '2025-09-10 02:29:12.743665', 'step': 2690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.776155', 'step': 2690, 'epoch': 1} {'type': 'loss', 'content': 0.16531866788864136, 'timestamp': '2025-09-10 02:29:12.778585', 'step': 2691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.809579', 'step': 2691, 'epoch': 1} {'type': 'loss', 'content': 0.22301068902015686, 'timestamp': '2025-09-10 02:29:12.834018', 'step': 2692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.864720', 'step': 2692, 'epoch': 1} {'type': 'loss', 'content': 0.15800832211971283, 'timestamp': '2025-09-10 02:29:12.867162', 'step': 2693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:12.897878', 'step': 2693, 'epoch': 1} {'type': 'loss', 'content': 0.17157818377017975, 'timestamp': '2025-09-10 02:29:12.900450', 'step': 2694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.931957', 'step': 2694, 'epoch': 1} {'type': 'loss', 'content': 0.1662951409816742, 'timestamp': '2025-09-10 02:29:12.934689', 'step': 2695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.967305', 'step': 2695, 'epoch': 1} {'type': 'loss', 'content': 0.13482385873794556, 'timestamp': '2025-09-10 02:29:12.991172', 'step': 2696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.026469', 'step': 2696, 'epoch': 1} {'type': 'loss', 'content': 0.13791577517986298, 'timestamp': '2025-09-10 02:29:13.031816', 'step': 2697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:13.076265', 'step': 2697, 'epoch': 1} {'type': 'loss', 'content': 0.16650491952896118, 'timestamp': '2025-09-10 02:29:13.079142', 'step': 2698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:13.110684', 'step': 2698, 'epoch': 1} {'type': 'loss', 'content': 0.18652407824993134, 'timestamp': '2025-09-10 02:29:13.113526', 'step': 2699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:13.143996', 'step': 2699, 'epoch': 1} {'type': 'loss', 'content': 0.184004545211792, 'timestamp': '2025-09-10 02:29:13.172743', 'step': 2700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:13.215328', 'step': 2700, 'epoch': 1} {'type': 'loss', 'content': 0.11115620285272598, 'timestamp': '2025-09-10 02:29:13.218132', 'step': 2701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.248773', 'step': 2701, 'epoch': 1} {'type': 'loss', 'content': 0.16857397556304932, 'timestamp': '2025-09-10 02:29:13.254665', 'step': 2702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:13.289082', 'step': 2702, 'epoch': 1} {'type': 'loss', 'content': 0.27158883213996887, 'timestamp': '2025-09-10 02:29:13.292607', 'step': 2703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:13.327476', 'step': 2703, 'epoch': 1} {'type': 'loss', 'content': 0.17748412489891052, 'timestamp': '2025-09-10 02:29:13.352693', 'step': 2704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:13.383540', 'step': 2704, 'epoch': 1} {'type': 'loss', 'content': 0.20288455486297607, 'timestamp': '2025-09-10 02:29:13.386061', 'step': 2705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:13.416850', 'step': 2705, 'epoch': 1} {'type': 'loss', 'content': 0.1528743952512741, 'timestamp': '2025-09-10 02:29:13.420588', 'step': 2706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:13.461285', 'step': 2706, 'epoch': 1} {'type': 'loss', 'content': 0.2620070278644562, 'timestamp': '2025-09-10 02:29:13.467442', 'step': 2707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:13.503881', 'step': 2707, 'epoch': 1} {'type': 'loss', 'content': 0.20562659204006195, 'timestamp': '2025-09-10 02:29:13.527874', 'step': 2708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:13.564861', 'step': 2708, 'epoch': 1} {'type': 'loss', 'content': 0.22910092771053314, 'timestamp': '2025-09-10 02:29:13.569761', 'step': 2709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.603868', 'step': 2709, 'epoch': 1} {'type': 'loss', 'content': 0.23444342613220215, 'timestamp': '2025-09-10 02:29:13.613532', 'step': 2710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:13.650289', 'step': 2710, 'epoch': 1} {'type': 'loss', 'content': 0.17452724277973175, 'timestamp': '2025-09-10 02:29:13.653649', 'step': 2711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.691650', 'step': 2711, 'epoch': 1} {'type': 'loss', 'content': 0.21719662845134735, 'timestamp': '2025-09-10 02:29:13.717312', 'step': 2712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:13.751742', 'step': 2712, 'epoch': 1} {'type': 'loss', 'content': 0.22609886527061462, 'timestamp': '2025-09-10 02:29:13.754402', 'step': 2713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:13.785498', 'step': 2713, 'epoch': 1} {'type': 'loss', 'content': 0.12202678620815277, 'timestamp': '2025-09-10 02:29:13.788089', 'step': 2714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:13.824599', 'step': 2714, 'epoch': 1} {'type': 'loss', 'content': 0.19181162118911743, 'timestamp': '2025-09-10 02:29:13.826967', 'step': 2715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:13.859735', 'step': 2715, 'epoch': 1} {'type': 'loss', 'content': 0.12190113961696625, 'timestamp': '2025-09-10 02:29:13.884719', 'step': 2716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:13.915213', 'step': 2716, 'epoch': 1} {'type': 'loss', 'content': 0.1273532509803772, 'timestamp': '2025-09-10 02:29:13.917557', 'step': 2717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.948571', 'step': 2717, 'epoch': 1} {'type': 'loss', 'content': 0.18298614025115967, 'timestamp': '2025-09-10 02:29:13.953237', 'step': 2718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:13.984433', 'step': 2718, 'epoch': 1} {'type': 'loss', 'content': 0.12998750805854797, 'timestamp': '2025-09-10 02:29:13.987403', 'step': 2719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.017939', 'step': 2719, 'epoch': 1} {'type': 'loss', 'content': 0.30143317580223083, 'timestamp': '2025-09-10 02:29:14.041813', 'step': 2720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:14.074302', 'step': 2720, 'epoch': 1} {'type': 'loss', 'content': 0.23312652111053467, 'timestamp': '2025-09-10 02:29:14.077159', 'step': 2721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.108298', 'step': 2721, 'epoch': 1} {'type': 'loss', 'content': 0.1575908213853836, 'timestamp': '2025-09-10 02:29:14.111193', 'step': 2722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:14.143752', 'step': 2722, 'epoch': 1} {'type': 'loss', 'content': 0.18460987508296967, 'timestamp': '2025-09-10 02:29:14.146883', 'step': 2723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.178597', 'step': 2723, 'epoch': 1} {'type': 'loss', 'content': 0.17983600497245789, 'timestamp': '2025-09-10 02:29:14.202567', 'step': 2724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.234974', 'step': 2724, 'epoch': 1} {'type': 'loss', 'content': 0.21065455675125122, 'timestamp': '2025-09-10 02:29:14.239776', 'step': 2725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.271978', 'step': 2725, 'epoch': 1} {'type': 'loss', 'content': 0.12267100065946579, 'timestamp': '2025-09-10 02:29:14.274671', 'step': 2726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:14.307785', 'step': 2726, 'epoch': 1} {'type': 'loss', 'content': 0.1751479208469391, 'timestamp': '2025-09-10 02:29:14.310818', 'step': 2727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.341340', 'step': 2727, 'epoch': 1} {'type': 'loss', 'content': 0.2381720393896103, 'timestamp': '2025-09-10 02:29:14.365261', 'step': 2728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.396215', 'step': 2728, 'epoch': 1} {'type': 'loss', 'content': 0.14057466387748718, 'timestamp': '2025-09-10 02:29:14.398587', 'step': 2729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.428919', 'step': 2729, 'epoch': 1} {'type': 'loss', 'content': 0.13827607035636902, 'timestamp': '2025-09-10 02:29:14.433113', 'step': 2730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.468264', 'step': 2730, 'epoch': 1} {'type': 'loss', 'content': 0.2417091429233551, 'timestamp': '2025-09-10 02:29:14.474872', 'step': 2731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.513716', 'step': 2731, 'epoch': 1} {'type': 'loss', 'content': 0.1467190831899643, 'timestamp': '2025-09-10 02:29:14.537239', 'step': 2732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:14.568541', 'step': 2732, 'epoch': 1} {'type': 'loss', 'content': 0.1375432312488556, 'timestamp': '2025-09-10 02:29:14.571700', 'step': 2733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:14.602804', 'step': 2733, 'epoch': 1} {'type': 'loss', 'content': 0.19756078720092773, 'timestamp': '2025-09-10 02:29:14.605294', 'step': 2734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.649049', 'step': 2734, 'epoch': 1} {'type': 'loss', 'content': 0.20112761855125427, 'timestamp': '2025-09-10 02:29:14.653166', 'step': 2735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:14.683460', 'step': 2735, 'epoch': 1} {'type': 'loss', 'content': 0.1779717355966568, 'timestamp': '2025-09-10 02:29:14.707404', 'step': 2736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:14.738363', 'step': 2736, 'epoch': 1} {'type': 'loss', 'content': 0.11025390028953552, 'timestamp': '2025-09-10 02:29:14.741589', 'step': 2737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:14.772754', 'step': 2737, 'epoch': 1} {'type': 'loss', 'content': 0.19173581898212433, 'timestamp': '2025-09-10 02:29:14.775539', 'step': 2738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:14.807862', 'step': 2738, 'epoch': 1} {'type': 'loss', 'content': 0.12817148864269257, 'timestamp': '2025-09-10 02:29:14.810285', 'step': 2739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.845189', 'step': 2739, 'epoch': 1} {'type': 'loss', 'content': 0.17852690815925598, 'timestamp': '2025-09-10 02:29:14.869136', 'step': 2740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:14.900535', 'step': 2740, 'epoch': 1} {'type': 'loss', 'content': 0.1643090546131134, 'timestamp': '2025-09-10 02:29:14.903253', 'step': 2741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:14.934123', 'step': 2741, 'epoch': 1} {'type': 'loss', 'content': 0.1174311637878418, 'timestamp': '2025-09-10 02:29:14.936356', 'step': 2742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.967092', 'step': 2742, 'epoch': 1} {'type': 'loss', 'content': 0.13613273203372955, 'timestamp': '2025-09-10 02:29:14.969961', 'step': 2743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:15.000411', 'step': 2743, 'epoch': 1} {'type': 'loss', 'content': 0.15690666437149048, 'timestamp': '2025-09-10 02:29:15.024767', 'step': 2744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:15.055037', 'step': 2744, 'epoch': 1} {'type': 'loss', 'content': 0.2219475507736206, 'timestamp': '2025-09-10 02:29:15.057802', 'step': 2745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:15.089765', 'step': 2745, 'epoch': 1} {'type': 'loss', 'content': 0.14570830762386322, 'timestamp': '2025-09-10 02:29:15.092258', 'step': 2746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:15.127304', 'step': 2746, 'epoch': 1} {'type': 'loss', 'content': 0.13860753178596497, 'timestamp': '2025-09-10 02:29:15.130553', 'step': 2747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:15.165134', 'step': 2747, 'epoch': 1} {'type': 'loss', 'content': 0.15188241004943848, 'timestamp': '2025-09-10 02:29:15.188855', 'step': 2748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:15.220186', 'step': 2748, 'epoch': 1} {'type': 'loss', 'content': 0.19181573390960693, 'timestamp': '2025-09-10 02:29:15.224566', 'step': 2749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:15.255867', 'step': 2749, 'epoch': 1} {'type': 'loss', 'content': 0.11680833995342255, 'timestamp': '2025-09-10 02:29:15.258194', 'step': 2750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:15.291030', 'step': 2750, 'epoch': 1} {'type': 'loss', 'content': 0.2303098738193512, 'timestamp': '2025-09-10 02:29:15.293732', 'step': 2751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:15.328217', 'step': 2751, 'epoch': 1} {'type': 'loss', 'content': 0.1259118616580963, 'timestamp': '2025-09-10 02:29:15.352412', 'step': 2752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:15.391242', 'step': 2752, 'epoch': 1} {'type': 'loss', 'content': 0.20886167883872986, 'timestamp': '2025-09-10 02:29:15.394311', 'step': 2753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:15.437716', 'step': 2753, 'epoch': 1} {'type': 'loss', 'content': 0.24389998614788055, 'timestamp': '2025-09-10 02:29:15.440207', 'step': 2754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:15.471300', 'step': 2754, 'epoch': 1} {'type': 'loss', 'content': 0.21852710843086243, 'timestamp': '2025-09-10 02:29:15.474456', 'step': 2755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:15.506915', 'step': 2755, 'epoch': 1} {'type': 'loss', 'content': 0.12989956140518188, 'timestamp': '2025-09-10 02:29:15.530893', 'step': 2756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:15.561395', 'step': 2756, 'epoch': 1} {'type': 'loss', 'content': 0.17526330053806305, 'timestamp': '2025-09-10 02:29:15.563717', 'step': 2757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:15.594951', 'step': 2757, 'epoch': 1} {'type': 'loss', 'content': 0.1991039514541626, 'timestamp': '2025-09-10 02:29:15.599723', 'step': 2758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:15.636375', 'step': 2758, 'epoch': 1} {'type': 'loss', 'content': 0.16304543614387512, 'timestamp': '2025-09-10 02:29:15.641417', 'step': 2759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:15.673513', 'step': 2759, 'epoch': 1} {'type': 'loss', 'content': 0.09515581279993057, 'timestamp': '2025-09-10 02:29:15.697566', 'step': 2760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:15.728118', 'step': 2760, 'epoch': 1} {'type': 'loss', 'content': 0.15753212571144104, 'timestamp': '2025-09-10 02:29:15.730618', 'step': 2761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:15.760971', 'step': 2761, 'epoch': 1} {'type': 'loss', 'content': 0.14156487584114075, 'timestamp': '2025-09-10 02:29:15.764079', 'step': 2762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:15.796196', 'step': 2762, 'epoch': 1} {'type': 'loss', 'content': 0.16358165442943573, 'timestamp': '2025-09-10 02:29:15.798517', 'step': 2763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:15.828604', 'step': 2763, 'epoch': 1} {'type': 'loss', 'content': 0.200249582529068, 'timestamp': '2025-09-10 02:29:15.853029', 'step': 2764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:15.884855', 'step': 2764, 'epoch': 1} {'type': 'loss', 'content': 0.16641315817832947, 'timestamp': '2025-09-10 02:29:15.887123', 'step': 2765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:15.918789', 'step': 2765, 'epoch': 1} {'type': 'loss', 'content': 0.19953499734401703, 'timestamp': '2025-09-10 02:29:15.921667', 'step': 2766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:15.952785', 'step': 2766, 'epoch': 1} {'type': 'loss', 'content': 0.17370854318141937, 'timestamp': '2025-09-10 02:29:15.955582', 'step': 2767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:15.986849', 'step': 2767, 'epoch': 1} {'type': 'loss', 'content': 0.16923241317272186, 'timestamp': '2025-09-10 02:29:16.011433', 'step': 2768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.042097', 'step': 2768, 'epoch': 1} {'type': 'loss', 'content': 0.14778076112270355, 'timestamp': '2025-09-10 02:29:16.044840', 'step': 2769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:16.075605', 'step': 2769, 'epoch': 1} {'type': 'loss', 'content': 0.21455997228622437, 'timestamp': '2025-09-10 02:29:16.078878', 'step': 2770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.110812', 'step': 2770, 'epoch': 1} {'type': 'loss', 'content': 0.11059718579053879, 'timestamp': '2025-09-10 02:29:16.113470', 'step': 2771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.144727', 'step': 2771, 'epoch': 1} {'type': 'loss', 'content': 0.22909922897815704, 'timestamp': '2025-09-10 02:29:16.168729', 'step': 2772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:16.199750', 'step': 2772, 'epoch': 1} {'type': 'loss', 'content': 0.07628633826971054, 'timestamp': '2025-09-10 02:29:16.203913', 'step': 2773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:16.234472', 'step': 2773, 'epoch': 1} {'type': 'loss', 'content': 0.2221471071243286, 'timestamp': '2025-09-10 02:29:16.236875', 'step': 2774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:16.267678', 'step': 2774, 'epoch': 1} {'type': 'loss', 'content': 0.16291239857673645, 'timestamp': '2025-09-10 02:29:16.270374', 'step': 2775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:16.302115', 'step': 2775, 'epoch': 1} {'type': 'loss', 'content': 0.1810550093650818, 'timestamp': '2025-09-10 02:29:16.326236', 'step': 2776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:16.358478', 'step': 2776, 'epoch': 1} {'type': 'loss', 'content': 0.1525914967060089, 'timestamp': '2025-09-10 02:29:16.360967', 'step': 2777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.391989', 'step': 2777, 'epoch': 1} {'type': 'loss', 'content': 0.13488416373729706, 'timestamp': '2025-09-10 02:29:16.394462', 'step': 2778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.424866', 'step': 2778, 'epoch': 1} {'type': 'loss', 'content': 0.10877993702888489, 'timestamp': '2025-09-10 02:29:16.427698', 'step': 2779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.459262', 'step': 2779, 'epoch': 1} {'type': 'loss', 'content': 0.23721492290496826, 'timestamp': '2025-09-10 02:29:16.483049', 'step': 2780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:16.514772', 'step': 2780, 'epoch': 1} {'type': 'loss', 'content': 0.1815914660692215, 'timestamp': '2025-09-10 02:29:16.517321', 'step': 2781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:16.547677', 'step': 2781, 'epoch': 1} {'type': 'loss', 'content': 0.15623030066490173, 'timestamp': '2025-09-10 02:29:16.550219', 'step': 2782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:16.583086', 'step': 2782, 'epoch': 1} {'type': 'loss', 'content': 0.198830708861351, 'timestamp': '2025-09-10 02:29:16.585475', 'step': 2783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:16.617725', 'step': 2783, 'epoch': 1} {'type': 'loss', 'content': 0.214021235704422, 'timestamp': '2025-09-10 02:29:16.641997', 'step': 2784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.673493', 'step': 2784, 'epoch': 1} {'type': 'loss', 'content': 0.11038295924663544, 'timestamp': '2025-09-10 02:29:16.676440', 'step': 2785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.707461', 'step': 2785, 'epoch': 1} {'type': 'loss', 'content': 0.24517780542373657, 'timestamp': '2025-09-10 02:29:16.709922', 'step': 2786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.740662', 'step': 2786, 'epoch': 1} {'type': 'loss', 'content': 0.167384073138237, 'timestamp': '2025-09-10 02:29:16.743083', 'step': 2787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.773577', 'step': 2787, 'epoch': 1} {'type': 'loss', 'content': 0.1691025048494339, 'timestamp': '2025-09-10 02:29:16.797270', 'step': 2788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:16.828003', 'step': 2788, 'epoch': 1} {'type': 'loss', 'content': 0.1488812416791916, 'timestamp': '2025-09-10 02:29:16.830227', 'step': 2789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.860889', 'step': 2789, 'epoch': 1} {'type': 'loss', 'content': 0.20651711523532867, 'timestamp': '2025-09-10 02:29:16.863291', 'step': 2790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:16.894434', 'step': 2790, 'epoch': 1} {'type': 'loss', 'content': 0.13472303748130798, 'timestamp': '2025-09-10 02:29:16.897070', 'step': 2791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:16.927304', 'step': 2791, 'epoch': 1} {'type': 'loss', 'content': 0.35608017444610596, 'timestamp': '2025-09-10 02:29:16.951177', 'step': 2792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:16.983294', 'step': 2792, 'epoch': 1} {'type': 'loss', 'content': 0.15308666229248047, 'timestamp': '2025-09-10 02:29:16.985434', 'step': 2793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:17.016355', 'step': 2793, 'epoch': 1} {'type': 'loss', 'content': 0.17256581783294678, 'timestamp': '2025-09-10 02:29:17.018848', 'step': 2794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.049519', 'step': 2794, 'epoch': 1} {'type': 'loss', 'content': 0.16293171048164368, 'timestamp': '2025-09-10 02:29:17.052112', 'step': 2795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.082325', 'step': 2795, 'epoch': 1} {'type': 'loss', 'content': 0.14042644202709198, 'timestamp': '2025-09-10 02:29:17.106270', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:29:25.210018', 'step': 2796, 'epoch': 1} {'type': 'pplx', 'content': 9567.84648330977, 'timestamp': '2025-09-10 02:29:25.212691', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:25.244497', 'step': 2796, 'epoch': 1} {'type': 'loss', 'content': 0.26458579301834106, 'timestamp': '2025-09-10 02:29:25.246916', 'step': 2797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:25.278093', 'step': 2797, 'epoch': 1} {'type': 'loss', 'content': 0.19322262704372406, 'timestamp': '2025-09-10 02:29:25.281973', 'step': 2798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:25.315036', 'step': 2798, 'epoch': 1} {'type': 'loss', 'content': 0.09826415777206421, 'timestamp': '2025-09-10 02:29:25.317403', 'step': 2799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:25.350073', 'step': 2799, 'epoch': 1} {'type': 'loss', 'content': 0.17090921103954315, 'timestamp': '2025-09-10 02:29:25.373747', 'step': 2800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:25.405165', 'step': 2800, 'epoch': 1} {'type': 'loss', 'content': 0.11597608774900436, 'timestamp': '2025-09-10 02:29:25.407628', 'step': 2801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:25.438649', 'step': 2801, 'epoch': 1} {'type': 'loss', 'content': 0.146453857421875, 'timestamp': '2025-09-10 02:29:25.441089', 'step': 2802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:25.471502', 'step': 2802, 'epoch': 1} {'type': 'loss', 'content': 0.10620591789484024, 'timestamp': '2025-09-10 02:29:25.474357', 'step': 2803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:25.508001', 'step': 2803, 'epoch': 1} {'type': 'loss', 'content': 0.18532676994800568, 'timestamp': '2025-09-10 02:29:25.532032', 'step': 2804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.562904', 'step': 2804, 'epoch': 1} {'type': 'loss', 'content': 0.16897840797901154, 'timestamp': '2025-09-10 02:29:25.565634', 'step': 2805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:25.596934', 'step': 2805, 'epoch': 1} {'type': 'loss', 'content': 0.12289096415042877, 'timestamp': '2025-09-10 02:29:25.601016', 'step': 2806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:25.634100', 'step': 2806, 'epoch': 1} {'type': 'loss', 'content': 0.24491168558597565, 'timestamp': '2025-09-10 02:29:25.637200', 'step': 2807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:25.673860', 'step': 2807, 'epoch': 1} {'type': 'loss', 'content': 0.20566780865192413, 'timestamp': '2025-09-10 02:29:25.697985', 'step': 2808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:25.735594', 'step': 2808, 'epoch': 1} {'type': 'loss', 'content': 0.3830515146255493, 'timestamp': '2025-09-10 02:29:25.738375', 'step': 2809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.770395', 'step': 2809, 'epoch': 1} {'type': 'loss', 'content': 0.1558404415845871, 'timestamp': '2025-09-10 02:29:25.773151', 'step': 2810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.806604', 'step': 2810, 'epoch': 1} {'type': 'loss', 'content': 0.11620591580867767, 'timestamp': '2025-09-10 02:29:25.809866', 'step': 2811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:25.843874', 'step': 2811, 'epoch': 1} {'type': 'loss', 'content': 0.1193157434463501, 'timestamp': '2025-09-10 02:29:25.868147', 'step': 2812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:25.910644', 'step': 2812, 'epoch': 1} {'type': 'loss', 'content': 0.16429656744003296, 'timestamp': '2025-09-10 02:29:25.913989', 'step': 2813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.946221', 'step': 2813, 'epoch': 1} {'type': 'loss', 'content': 0.14961227774620056, 'timestamp': '2025-09-10 02:29:25.949436', 'step': 2814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.981658', 'step': 2814, 'epoch': 1} {'type': 'loss', 'content': 0.19784125685691833, 'timestamp': '2025-09-10 02:29:25.984134', 'step': 2815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:26.015121', 'step': 2815, 'epoch': 1} {'type': 'loss', 'content': 0.1886449158191681, 'timestamp': '2025-09-10 02:29:26.038375', 'step': 2816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:26.069817', 'step': 2816, 'epoch': 1} {'type': 'loss', 'content': 0.12617678940296173, 'timestamp': '2025-09-10 02:29:26.072735', 'step': 2817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.105809', 'step': 2817, 'epoch': 1} {'type': 'loss', 'content': 0.142006978392601, 'timestamp': '2025-09-10 02:29:26.108243', 'step': 2818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:26.139356', 'step': 2818, 'epoch': 1} {'type': 'loss', 'content': 0.29610922932624817, 'timestamp': '2025-09-10 02:29:26.141659', 'step': 2819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:26.172413', 'step': 2819, 'epoch': 1} {'type': 'loss', 'content': 0.2138553112745285, 'timestamp': '2025-09-10 02:29:26.195905', 'step': 2820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.227107', 'step': 2820, 'epoch': 1} {'type': 'loss', 'content': 0.1709800809621811, 'timestamp': '2025-09-10 02:29:26.229323', 'step': 2821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.273937', 'step': 2821, 'epoch': 1} {'type': 'loss', 'content': 0.24992775917053223, 'timestamp': '2025-09-10 02:29:26.277600', 'step': 2822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:26.326853', 'step': 2822, 'epoch': 1} {'type': 'loss', 'content': 0.1732897013425827, 'timestamp': '2025-09-10 02:29:26.329016', 'step': 2823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.361045', 'step': 2823, 'epoch': 1} {'type': 'loss', 'content': 0.08862721920013428, 'timestamp': '2025-09-10 02:29:26.384721', 'step': 2824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:26.415439', 'step': 2824, 'epoch': 1} {'type': 'loss', 'content': 0.13528327643871307, 'timestamp': '2025-09-10 02:29:26.417966', 'step': 2825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:26.448542', 'step': 2825, 'epoch': 1} {'type': 'loss', 'content': 0.17010782659053802, 'timestamp': '2025-09-10 02:29:26.451051', 'step': 2826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.481406', 'step': 2826, 'epoch': 1} {'type': 'loss', 'content': 0.17072226107120514, 'timestamp': '2025-09-10 02:29:26.483523', 'step': 2827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:26.514602', 'step': 2827, 'epoch': 1} {'type': 'loss', 'content': 0.08859883248806, 'timestamp': '2025-09-10 02:29:26.538312', 'step': 2828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.569042', 'step': 2828, 'epoch': 1} {'type': 'loss', 'content': 0.12069286406040192, 'timestamp': '2025-09-10 02:29:26.571273', 'step': 2829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.602431', 'step': 2829, 'epoch': 1} {'type': 'loss', 'content': 0.27785056829452515, 'timestamp': '2025-09-10 02:29:26.604813', 'step': 2830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.635633', 'step': 2830, 'epoch': 1} {'type': 'loss', 'content': 0.1856326311826706, 'timestamp': '2025-09-10 02:29:26.639147', 'step': 2831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.669778', 'step': 2831, 'epoch': 1} {'type': 'loss', 'content': 0.15604168176651, 'timestamp': '2025-09-10 02:29:26.693704', 'step': 2832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.726432', 'step': 2832, 'epoch': 1} {'type': 'loss', 'content': 0.12007616460323334, 'timestamp': '2025-09-10 02:29:26.728960', 'step': 2833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:26.760971', 'step': 2833, 'epoch': 1} {'type': 'loss', 'content': 0.10782086849212646, 'timestamp': '2025-09-10 02:29:26.763130', 'step': 2834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:26.793953', 'step': 2834, 'epoch': 1} {'type': 'loss', 'content': 0.11217661947011948, 'timestamp': '2025-09-10 02:29:26.796153', 'step': 2835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:26.827665', 'step': 2835, 'epoch': 1} {'type': 'loss', 'content': 0.16456176340579987, 'timestamp': '2025-09-10 02:29:26.851499', 'step': 2836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:26.883794', 'step': 2836, 'epoch': 1} {'type': 'loss', 'content': 0.21714359521865845, 'timestamp': '2025-09-10 02:29:26.886113', 'step': 2837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:26.917564', 'step': 2837, 'epoch': 1} {'type': 'loss', 'content': 0.2032214105129242, 'timestamp': '2025-09-10 02:29:26.919819', 'step': 2838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.949658', 'step': 2838, 'epoch': 1} {'type': 'loss', 'content': 0.13336579501628876, 'timestamp': '2025-09-10 02:29:26.952286', 'step': 2839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.984506', 'step': 2839, 'epoch': 1} {'type': 'loss', 'content': 0.1727626472711563, 'timestamp': '2025-09-10 02:29:27.008365', 'step': 2840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:27.039159', 'step': 2840, 'epoch': 1} {'type': 'loss', 'content': 0.3258360028266907, 'timestamp': '2025-09-10 02:29:27.043952', 'step': 2841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.075035', 'step': 2841, 'epoch': 1} {'type': 'loss', 'content': 0.15126824378967285, 'timestamp': '2025-09-10 02:29:27.077288', 'step': 2842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:27.108672', 'step': 2842, 'epoch': 1} {'type': 'loss', 'content': 0.1458577662706375, 'timestamp': '2025-09-10 02:29:27.111169', 'step': 2843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:27.141826', 'step': 2843, 'epoch': 1} {'type': 'loss', 'content': 0.16880454123020172, 'timestamp': '2025-09-10 02:29:27.166866', 'step': 2844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.198172', 'step': 2844, 'epoch': 1} {'type': 'loss', 'content': 0.17666451632976532, 'timestamp': '2025-09-10 02:29:27.201489', 'step': 2845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.232976', 'step': 2845, 'epoch': 1} {'type': 'loss', 'content': 0.15793681144714355, 'timestamp': '2025-09-10 02:29:27.235511', 'step': 2846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.265516', 'step': 2846, 'epoch': 1} {'type': 'loss', 'content': 0.16697260737419128, 'timestamp': '2025-09-10 02:29:27.267648', 'step': 2847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.299072', 'step': 2847, 'epoch': 1} {'type': 'loss', 'content': 0.14557138085365295, 'timestamp': '2025-09-10 02:29:27.323915', 'step': 2848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.354635', 'step': 2848, 'epoch': 1} {'type': 'loss', 'content': 0.11799270659685135, 'timestamp': '2025-09-10 02:29:27.357264', 'step': 2849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.388653', 'step': 2849, 'epoch': 1} {'type': 'loss', 'content': 0.13604408502578735, 'timestamp': '2025-09-10 02:29:27.391247', 'step': 2850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.423286', 'step': 2850, 'epoch': 1} {'type': 'loss', 'content': 0.12315557897090912, 'timestamp': '2025-09-10 02:29:27.425308', 'step': 2851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:27.455558', 'step': 2851, 'epoch': 1} {'type': 'loss', 'content': 0.13808919489383698, 'timestamp': '2025-09-10 02:29:27.479377', 'step': 2852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:27.513324', 'step': 2852, 'epoch': 1} {'type': 'loss', 'content': 0.1487739533185959, 'timestamp': '2025-09-10 02:29:27.516009', 'step': 2853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.546784', 'step': 2853, 'epoch': 1} {'type': 'loss', 'content': 0.19262492656707764, 'timestamp': '2025-09-10 02:29:27.550898', 'step': 2854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.581794', 'step': 2854, 'epoch': 1} {'type': 'loss', 'content': 0.20221833884716034, 'timestamp': '2025-09-10 02:29:27.589094', 'step': 2855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.624748', 'step': 2855, 'epoch': 1} {'type': 'loss', 'content': 0.15250101685523987, 'timestamp': '2025-09-10 02:29:27.650276', 'step': 2856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:27.690200', 'step': 2856, 'epoch': 1} {'type': 'loss', 'content': 0.25012925267219543, 'timestamp': '2025-09-10 02:29:27.694259', 'step': 2857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:27.732662', 'step': 2857, 'epoch': 1} {'type': 'loss', 'content': 0.1423090249300003, 'timestamp': '2025-09-10 02:29:27.736982', 'step': 2858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:27.775898', 'step': 2858, 'epoch': 1} {'type': 'loss', 'content': 0.27607467770576477, 'timestamp': '2025-09-10 02:29:27.780168', 'step': 2859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.812647', 'step': 2859, 'epoch': 1} {'type': 'loss', 'content': 0.19521023333072662, 'timestamp': '2025-09-10 02:29:27.836389', 'step': 2860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.868370', 'step': 2860, 'epoch': 1} {'type': 'loss', 'content': 0.15947160124778748, 'timestamp': '2025-09-10 02:29:27.871085', 'step': 2861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.903243', 'step': 2861, 'epoch': 1} {'type': 'loss', 'content': 0.18882665038108826, 'timestamp': '2025-09-10 02:29:27.906102', 'step': 2862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.960298', 'step': 2862, 'epoch': 1} {'type': 'loss', 'content': 0.0703602135181427, 'timestamp': '2025-09-10 02:29:27.962897', 'step': 2863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.002000', 'step': 2863, 'epoch': 1} {'type': 'loss', 'content': 0.11983750760555267, 'timestamp': '2025-09-10 02:29:28.026896', 'step': 2864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:28.059690', 'step': 2864, 'epoch': 1} {'type': 'loss', 'content': 0.202344611287117, 'timestamp': '2025-09-10 02:29:28.067422', 'step': 2865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:28.105115', 'step': 2865, 'epoch': 1} {'type': 'loss', 'content': 0.1724071204662323, 'timestamp': '2025-09-10 02:29:28.108154', 'step': 2866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.145446', 'step': 2866, 'epoch': 1} {'type': 'loss', 'content': 0.19818684458732605, 'timestamp': '2025-09-10 02:29:28.148129', 'step': 2867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.182457', 'step': 2867, 'epoch': 1} {'type': 'loss', 'content': 0.18585510551929474, 'timestamp': '2025-09-10 02:29:28.206917', 'step': 2868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:28.241221', 'step': 2868, 'epoch': 1} {'type': 'loss', 'content': 0.18979506194591522, 'timestamp': '2025-09-10 02:29:28.243770', 'step': 2869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:28.276590', 'step': 2869, 'epoch': 1} {'type': 'loss', 'content': 0.11815610527992249, 'timestamp': '2025-09-10 02:29:28.279393', 'step': 2870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.311204', 'step': 2870, 'epoch': 1} {'type': 'loss', 'content': 0.1549367606639862, 'timestamp': '2025-09-10 02:29:28.314663', 'step': 2871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.346076', 'step': 2871, 'epoch': 1} {'type': 'loss', 'content': 0.0985003337264061, 'timestamp': '2025-09-10 02:29:28.384694', 'step': 2872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.417069', 'step': 2872, 'epoch': 1} {'type': 'loss', 'content': 0.2179889976978302, 'timestamp': '2025-09-10 02:29:28.419399', 'step': 2873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:28.451093', 'step': 2873, 'epoch': 1} {'type': 'loss', 'content': 0.09237606823444366, 'timestamp': '2025-09-10 02:29:28.453202', 'step': 2874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:28.485038', 'step': 2874, 'epoch': 1} {'type': 'loss', 'content': 0.14012961089611053, 'timestamp': '2025-09-10 02:29:28.489573', 'step': 2875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:28.536043', 'step': 2875, 'epoch': 1} {'type': 'loss', 'content': 0.2328987866640091, 'timestamp': '2025-09-10 02:29:28.561181', 'step': 2876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:28.593059', 'step': 2876, 'epoch': 1} {'type': 'loss', 'content': 0.23068265616893768, 'timestamp': '2025-09-10 02:29:28.596217', 'step': 2877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:28.628712', 'step': 2877, 'epoch': 1} {'type': 'loss', 'content': 0.1780923753976822, 'timestamp': '2025-09-10 02:29:28.630828', 'step': 2878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:28.661564', 'step': 2878, 'epoch': 1} {'type': 'loss', 'content': 0.16345719993114471, 'timestamp': '2025-09-10 02:29:28.666194', 'step': 2879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:28.697214', 'step': 2879, 'epoch': 1} {'type': 'loss', 'content': 0.1851814091205597, 'timestamp': '2025-09-10 02:29:28.721227', 'step': 2880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:28.754377', 'step': 2880, 'epoch': 1} {'type': 'loss', 'content': 0.1907264143228531, 'timestamp': '2025-09-10 02:29:28.756741', 'step': 2881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:28.791438', 'step': 2881, 'epoch': 1} {'type': 'loss', 'content': 0.21767763793468475, 'timestamp': '2025-09-10 02:29:28.793933', 'step': 2882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:28.825088', 'step': 2882, 'epoch': 1} {'type': 'loss', 'content': 0.14668884873390198, 'timestamp': '2025-09-10 02:29:28.827511', 'step': 2883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:28.858071', 'step': 2883, 'epoch': 1} {'type': 'loss', 'content': 0.24510742723941803, 'timestamp': '2025-09-10 02:29:28.881747', 'step': 2884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:28.914197', 'step': 2884, 'epoch': 1} {'type': 'loss', 'content': 0.12693023681640625, 'timestamp': '2025-09-10 02:29:28.917116', 'step': 2885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:28.947605', 'step': 2885, 'epoch': 1} {'type': 'loss', 'content': 0.13093890249729156, 'timestamp': '2025-09-10 02:29:28.950035', 'step': 2886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:28.980967', 'step': 2886, 'epoch': 1} {'type': 'loss', 'content': 0.15460403263568878, 'timestamp': '2025-09-10 02:29:28.983673', 'step': 2887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.015555', 'step': 2887, 'epoch': 1} {'type': 'loss', 'content': 0.12076568603515625, 'timestamp': '2025-09-10 02:29:29.039297', 'step': 2888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:29.070569', 'step': 2888, 'epoch': 1} {'type': 'loss', 'content': 0.18870121240615845, 'timestamp': '2025-09-10 02:29:29.073240', 'step': 2889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.104318', 'step': 2889, 'epoch': 1} {'type': 'loss', 'content': 0.14477860927581787, 'timestamp': '2025-09-10 02:29:29.107386', 'step': 2890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:29.137786', 'step': 2890, 'epoch': 1} {'type': 'loss', 'content': 0.1030610203742981, 'timestamp': '2025-09-10 02:29:29.140695', 'step': 2891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:29.172566', 'step': 2891, 'epoch': 1} {'type': 'loss', 'content': 0.1217774972319603, 'timestamp': '2025-09-10 02:29:29.196528', 'step': 2892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.227453', 'step': 2892, 'epoch': 1} {'type': 'loss', 'content': 0.11690647900104523, 'timestamp': '2025-09-10 02:29:29.230037', 'step': 2893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.262068', 'step': 2893, 'epoch': 1} {'type': 'loss', 'content': 0.15832731127738953, 'timestamp': '2025-09-10 02:29:29.264530', 'step': 2894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.297352', 'step': 2894, 'epoch': 1} {'type': 'loss', 'content': 0.10935714095830917, 'timestamp': '2025-09-10 02:29:29.299919', 'step': 2895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:29.333646', 'step': 2895, 'epoch': 1} {'type': 'loss', 'content': 0.18573574721813202, 'timestamp': '2025-09-10 02:29:29.358999', 'step': 2896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.391749', 'step': 2896, 'epoch': 1} {'type': 'loss', 'content': 0.05963965132832527, 'timestamp': '2025-09-10 02:29:29.394587', 'step': 2897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.425837', 'step': 2897, 'epoch': 1} {'type': 'loss', 'content': 0.17243359982967377, 'timestamp': '2025-09-10 02:29:29.428323', 'step': 2898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.459859', 'step': 2898, 'epoch': 1} {'type': 'loss', 'content': 0.21847692131996155, 'timestamp': '2025-09-10 02:29:29.462577', 'step': 2899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:29.493990', 'step': 2899, 'epoch': 1} {'type': 'loss', 'content': 0.17138715088367462, 'timestamp': '2025-09-10 02:29:29.518040', 'step': 2900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.549366', 'step': 2900, 'epoch': 1} {'type': 'loss', 'content': 0.13820765912532806, 'timestamp': '2025-09-10 02:29:29.551771', 'step': 2901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.582997', 'step': 2901, 'epoch': 1} {'type': 'loss', 'content': 0.14217431843280792, 'timestamp': '2025-09-10 02:29:29.585755', 'step': 2902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.618260', 'step': 2902, 'epoch': 1} {'type': 'loss', 'content': 0.21859285235404968, 'timestamp': '2025-09-10 02:29:29.620412', 'step': 2903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:29.652035', 'step': 2903, 'epoch': 1} {'type': 'loss', 'content': 0.08993946015834808, 'timestamp': '2025-09-10 02:29:29.675496', 'step': 2904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:29.709672', 'step': 2904, 'epoch': 1} {'type': 'loss', 'content': 0.2028021514415741, 'timestamp': '2025-09-10 02:29:29.712282', 'step': 2905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.742950', 'step': 2905, 'epoch': 1} {'type': 'loss', 'content': 0.1702849268913269, 'timestamp': '2025-09-10 02:29:29.744953', 'step': 2906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:29.776793', 'step': 2906, 'epoch': 1} {'type': 'loss', 'content': 0.11424145847558975, 'timestamp': '2025-09-10 02:29:29.780133', 'step': 2907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:29.813120', 'step': 2907, 'epoch': 1} {'type': 'loss', 'content': 0.19923122227191925, 'timestamp': '2025-09-10 02:29:29.837126', 'step': 2908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.872810', 'step': 2908, 'epoch': 1} {'type': 'loss', 'content': 0.2076682150363922, 'timestamp': '2025-09-10 02:29:29.876582', 'step': 2909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:29.920035', 'step': 2909, 'epoch': 1} {'type': 'loss', 'content': 0.12507633864879608, 'timestamp': '2025-09-10 02:29:29.926492', 'step': 2910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:29.972870', 'step': 2910, 'epoch': 1} {'type': 'loss', 'content': 0.17529086768627167, 'timestamp': '2025-09-10 02:29:29.980708', 'step': 2911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:30.014393', 'step': 2911, 'epoch': 1} {'type': 'loss', 'content': 0.19962140917778015, 'timestamp': '2025-09-10 02:29:30.038289', 'step': 2912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:30.069837', 'step': 2912, 'epoch': 1} {'type': 'loss', 'content': 0.1488262563943863, 'timestamp': '2025-09-10 02:29:30.072276', 'step': 2913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:30.103050', 'step': 2913, 'epoch': 1} {'type': 'loss', 'content': 0.18740664422512054, 'timestamp': '2025-09-10 02:29:30.105585', 'step': 2914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:30.136891', 'step': 2914, 'epoch': 1} {'type': 'loss', 'content': 0.1576729714870453, 'timestamp': '2025-09-10 02:29:30.142087', 'step': 2915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:30.178607', 'step': 2915, 'epoch': 1} {'type': 'loss', 'content': 0.17281392216682434, 'timestamp': '2025-09-10 02:29:30.211821', 'step': 2916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:30.243081', 'step': 2916, 'epoch': 1} {'type': 'loss', 'content': 0.12820297479629517, 'timestamp': '2025-09-10 02:29:30.245502', 'step': 2917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:30.277558', 'step': 2917, 'epoch': 1} {'type': 'loss', 'content': 0.1955871284008026, 'timestamp': '2025-09-10 02:29:30.279895', 'step': 2918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:30.313083', 'step': 2918, 'epoch': 1} {'type': 'loss', 'content': 0.17101788520812988, 'timestamp': '2025-09-10 02:29:30.316721', 'step': 2919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:30.347986', 'step': 2919, 'epoch': 1} {'type': 'loss', 'content': 0.20508919656276703, 'timestamp': '2025-09-10 02:29:30.372868', 'step': 2920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:30.406535', 'step': 2920, 'epoch': 1} {'type': 'loss', 'content': 0.17623472213745117, 'timestamp': '2025-09-10 02:29:30.408909', 'step': 2921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:30.438909', 'step': 2921, 'epoch': 1} {'type': 'loss', 'content': 0.15475255250930786, 'timestamp': '2025-09-10 02:29:30.442523', 'step': 2922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:30.475364', 'step': 2922, 'epoch': 1} {'type': 'loss', 'content': 0.19076234102249146, 'timestamp': '2025-09-10 02:29:30.477872', 'step': 2923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:30.507696', 'step': 2923, 'epoch': 1} {'type': 'loss', 'content': 0.18647529184818268, 'timestamp': '2025-09-10 02:29:30.531281', 'step': 2924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:30.564662', 'step': 2924, 'epoch': 1} {'type': 'loss', 'content': 0.2242709845304489, 'timestamp': '2025-09-10 02:29:30.568269', 'step': 2925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:30.607086', 'step': 2925, 'epoch': 1} {'type': 'loss', 'content': 0.1870044320821762, 'timestamp': '2025-09-10 02:29:30.609635', 'step': 2926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:30.644603', 'step': 2926, 'epoch': 1} {'type': 'loss', 'content': 0.09590485692024231, 'timestamp': '2025-09-10 02:29:30.646919', 'step': 2927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:30.681732', 'step': 2927, 'epoch': 1} {'type': 'loss', 'content': 0.1738225817680359, 'timestamp': '2025-09-10 02:29:30.706200', 'step': 2928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:30.740559', 'step': 2928, 'epoch': 1} {'type': 'loss', 'content': 0.21689961850643158, 'timestamp': '2025-09-10 02:29:30.742783', 'step': 2929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:30.781853', 'step': 2929, 'epoch': 1} {'type': 'loss', 'content': 0.16797477006912231, 'timestamp': '2025-09-10 02:29:30.790550', 'step': 2930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:30.829093', 'step': 2930, 'epoch': 1} {'type': 'loss', 'content': 0.15986496210098267, 'timestamp': '2025-09-10 02:29:30.831665', 'step': 2931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:30.862428', 'step': 2931, 'epoch': 1} {'type': 'loss', 'content': 0.20310552418231964, 'timestamp': '2025-09-10 02:29:30.885938', 'step': 2932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:30.920069', 'step': 2932, 'epoch': 1} {'type': 'loss', 'content': 0.1734294295310974, 'timestamp': '2025-09-10 02:29:30.922536', 'step': 2933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:30.955694', 'step': 2933, 'epoch': 1} {'type': 'loss', 'content': 0.24777251482009888, 'timestamp': '2025-09-10 02:29:30.958558', 'step': 2934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:30.990640', 'step': 2934, 'epoch': 1} {'type': 'loss', 'content': 0.14859893918037415, 'timestamp': '2025-09-10 02:29:30.994555', 'step': 2935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:31.026146', 'step': 2935, 'epoch': 1} {'type': 'loss', 'content': 0.22739675641059875, 'timestamp': '2025-09-10 02:29:31.051556', 'step': 2936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:31.086361', 'step': 2936, 'epoch': 1} {'type': 'loss', 'content': 0.19373366236686707, 'timestamp': '2025-09-10 02:29:31.090263', 'step': 2937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:31.130276', 'step': 2937, 'epoch': 1} {'type': 'loss', 'content': 0.23258177936077118, 'timestamp': '2025-09-10 02:29:31.133323', 'step': 2938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.163783', 'step': 2938, 'epoch': 1} {'type': 'loss', 'content': 0.19768975675106049, 'timestamp': '2025-09-10 02:29:31.166820', 'step': 2939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:31.199504', 'step': 2939, 'epoch': 1} {'type': 'loss', 'content': 0.2377227246761322, 'timestamp': '2025-09-10 02:29:31.223194', 'step': 2940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.256398', 'step': 2940, 'epoch': 1} {'type': 'loss', 'content': 0.19690659642219543, 'timestamp': '2025-09-10 02:29:31.263444', 'step': 2941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.297957', 'step': 2941, 'epoch': 1} {'type': 'loss', 'content': 0.12275376915931702, 'timestamp': '2025-09-10 02:29:31.300602', 'step': 2942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.334377', 'step': 2942, 'epoch': 1} {'type': 'loss', 'content': 0.13810113072395325, 'timestamp': '2025-09-10 02:29:31.340458', 'step': 2943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:31.371702', 'step': 2943, 'epoch': 1} {'type': 'loss', 'content': 0.08929838985204697, 'timestamp': '2025-09-10 02:29:31.395522', 'step': 2944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.429871', 'step': 2944, 'epoch': 1} {'type': 'loss', 'content': 0.1359989494085312, 'timestamp': '2025-09-10 02:29:31.432190', 'step': 2945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:31.473476', 'step': 2945, 'epoch': 1} {'type': 'loss', 'content': 0.141062393784523, 'timestamp': '2025-09-10 02:29:31.475826', 'step': 2946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:31.511178', 'step': 2946, 'epoch': 1} {'type': 'loss', 'content': 0.1711694896221161, 'timestamp': '2025-09-10 02:29:31.513528', 'step': 2947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.544546', 'step': 2947, 'epoch': 1} {'type': 'loss', 'content': 0.1381046324968338, 'timestamp': '2025-09-10 02:29:31.568371', 'step': 2948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:31.602439', 'step': 2948, 'epoch': 1} {'type': 'loss', 'content': 0.0843718945980072, 'timestamp': '2025-09-10 02:29:31.604912', 'step': 2949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.636919', 'step': 2949, 'epoch': 1} {'type': 'loss', 'content': 0.12221649289131165, 'timestamp': '2025-09-10 02:29:31.639168', 'step': 2950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.679000', 'step': 2950, 'epoch': 1} {'type': 'loss', 'content': 0.18113288283348083, 'timestamp': '2025-09-10 02:29:31.681976', 'step': 2951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:31.714908', 'step': 2951, 'epoch': 1} {'type': 'loss', 'content': 0.2231016308069229, 'timestamp': '2025-09-10 02:29:31.740713', 'step': 2952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.772733', 'step': 2952, 'epoch': 1} {'type': 'loss', 'content': 0.1835748851299286, 'timestamp': '2025-09-10 02:29:31.777510', 'step': 2953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.812628', 'step': 2953, 'epoch': 1} {'type': 'loss', 'content': 0.15512286126613617, 'timestamp': '2025-09-10 02:29:31.815158', 'step': 2954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:31.847432', 'step': 2954, 'epoch': 1} {'type': 'loss', 'content': 0.1856973022222519, 'timestamp': '2025-09-10 02:29:31.850304', 'step': 2955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:31.885745', 'step': 2955, 'epoch': 1} {'type': 'loss', 'content': 0.11114994436502457, 'timestamp': '2025-09-10 02:29:31.911181', 'step': 2956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:31.943228', 'step': 2956, 'epoch': 1} {'type': 'loss', 'content': 0.199825257062912, 'timestamp': '2025-09-10 02:29:31.945300', 'step': 2957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:31.975736', 'step': 2957, 'epoch': 1} {'type': 'loss', 'content': 0.24443155527114868, 'timestamp': '2025-09-10 02:29:31.978138', 'step': 2958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:32.009251', 'step': 2958, 'epoch': 1} {'type': 'loss', 'content': 0.19398701190948486, 'timestamp': '2025-09-10 02:29:32.011440', 'step': 2959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.041994', 'step': 2959, 'epoch': 1} {'type': 'loss', 'content': 0.18236133456230164, 'timestamp': '2025-09-10 02:29:32.065419', 'step': 2960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.098121', 'step': 2960, 'epoch': 1} {'type': 'loss', 'content': 0.17902226746082306, 'timestamp': '2025-09-10 02:29:32.100426', 'step': 2961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.132788', 'step': 2961, 'epoch': 1} {'type': 'loss', 'content': 0.1753825545310974, 'timestamp': '2025-09-10 02:29:32.135907', 'step': 2962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:32.167262', 'step': 2962, 'epoch': 1} {'type': 'loss', 'content': 0.16142985224723816, 'timestamp': '2025-09-10 02:29:32.169751', 'step': 2963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:32.200876', 'step': 2963, 'epoch': 1} {'type': 'loss', 'content': 0.13567090034484863, 'timestamp': '2025-09-10 02:29:32.224550', 'step': 2964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:32.256412', 'step': 2964, 'epoch': 1} {'type': 'loss', 'content': 0.18153055012226105, 'timestamp': '2025-09-10 02:29:32.258786', 'step': 2965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:32.290942', 'step': 2965, 'epoch': 1} {'type': 'loss', 'content': 0.22256530821323395, 'timestamp': '2025-09-10 02:29:32.293754', 'step': 2966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:32.325882', 'step': 2966, 'epoch': 1} {'type': 'loss', 'content': 0.14982792735099792, 'timestamp': '2025-09-10 02:29:32.328882', 'step': 2967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.360149', 'step': 2967, 'epoch': 1} {'type': 'loss', 'content': 0.11853358149528503, 'timestamp': '2025-09-10 02:29:32.384429', 'step': 2968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:32.417565', 'step': 2968, 'epoch': 1} {'type': 'loss', 'content': 0.1415366679430008, 'timestamp': '2025-09-10 02:29:32.420095', 'step': 2969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.451519', 'step': 2969, 'epoch': 1} {'type': 'loss', 'content': 0.12709547579288483, 'timestamp': '2025-09-10 02:29:32.453893', 'step': 2970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.484551', 'step': 2970, 'epoch': 1} {'type': 'loss', 'content': 0.23012617230415344, 'timestamp': '2025-09-10 02:29:32.487029', 'step': 2971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:32.518028', 'step': 2971, 'epoch': 1} {'type': 'loss', 'content': 0.1765979379415512, 'timestamp': '2025-09-10 02:29:32.541581', 'step': 2972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:32.572685', 'step': 2972, 'epoch': 1} {'type': 'loss', 'content': 0.15480723977088928, 'timestamp': '2025-09-10 02:29:32.575250', 'step': 2973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:32.606334', 'step': 2973, 'epoch': 1} {'type': 'loss', 'content': 0.13345202803611755, 'timestamp': '2025-09-10 02:29:32.609134', 'step': 2974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:32.642757', 'step': 2974, 'epoch': 1} {'type': 'loss', 'content': 0.16493023931980133, 'timestamp': '2025-09-10 02:29:32.645757', 'step': 2975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:32.676982', 'step': 2975, 'epoch': 1} {'type': 'loss', 'content': 0.12158387899398804, 'timestamp': '2025-09-10 02:29:32.704161', 'step': 2976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:29:32.735214', 'step': 2976, 'epoch': 1} {'type': 'loss', 'content': 0.1863318234682083, 'timestamp': '2025-09-10 02:29:32.739545', 'step': 2977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.769731', 'step': 2977, 'epoch': 1} {'type': 'loss', 'content': 0.1236381009221077, 'timestamp': '2025-09-10 02:29:32.772421', 'step': 2978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:32.804073', 'step': 2978, 'epoch': 1} {'type': 'loss', 'content': 0.09494079649448395, 'timestamp': '2025-09-10 02:29:32.806752', 'step': 2979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:32.837897', 'step': 2979, 'epoch': 1} {'type': 'loss', 'content': 0.20098215341567993, 'timestamp': '2025-09-10 02:29:32.861814', 'step': 2980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:32.893878', 'step': 2980, 'epoch': 1} {'type': 'loss', 'content': 0.11261029541492462, 'timestamp': '2025-09-10 02:29:32.896994', 'step': 2981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:32.928043', 'step': 2981, 'epoch': 1} {'type': 'loss', 'content': 0.17822463810443878, 'timestamp': '2025-09-10 02:29:32.930568', 'step': 2982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:32.961986', 'step': 2982, 'epoch': 1} {'type': 'loss', 'content': 0.1464541107416153, 'timestamp': '2025-09-10 02:29:32.964355', 'step': 2983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:32.995365', 'step': 2983, 'epoch': 1} {'type': 'loss', 'content': 0.2552075684070587, 'timestamp': '2025-09-10 02:29:33.019712', 'step': 2984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:33.050612', 'step': 2984, 'epoch': 1} {'type': 'loss', 'content': 0.11346741020679474, 'timestamp': '2025-09-10 02:29:33.053467', 'step': 2985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:33.084323', 'step': 2985, 'epoch': 1} {'type': 'loss', 'content': 0.17293532192707062, 'timestamp': '2025-09-10 02:29:33.087025', 'step': 2986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:33.117899', 'step': 2986, 'epoch': 1} {'type': 'loss', 'content': 0.0853479653596878, 'timestamp': '2025-09-10 02:29:33.120980', 'step': 2987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:33.152119', 'step': 2987, 'epoch': 1} {'type': 'loss', 'content': 0.1905423253774643, 'timestamp': '2025-09-10 02:29:33.177068', 'step': 2988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:33.208620', 'step': 2988, 'epoch': 1} {'type': 'loss', 'content': 0.256778746843338, 'timestamp': '2025-09-10 02:29:33.212658', 'step': 2989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:33.248178', 'step': 2989, 'epoch': 1} {'type': 'loss', 'content': 0.178077831864357, 'timestamp': '2025-09-10 02:29:33.250755', 'step': 2990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:33.280797', 'step': 2990, 'epoch': 1} {'type': 'loss', 'content': 0.14499321579933167, 'timestamp': '2025-09-10 02:29:33.284107', 'step': 2991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:33.317474', 'step': 2991, 'epoch': 1} {'type': 'loss', 'content': 0.22869838774204254, 'timestamp': '2025-09-10 02:29:33.344755', 'step': 2992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:33.378316', 'step': 2992, 'epoch': 1} {'type': 'loss', 'content': 0.27366286516189575, 'timestamp': '2025-09-10 02:29:33.381889', 'step': 2993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:33.414721', 'step': 2993, 'epoch': 1} {'type': 'loss', 'content': 0.20973792672157288, 'timestamp': '2025-09-10 02:29:33.417943', 'step': 2994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:33.452469', 'step': 2994, 'epoch': 1} {'type': 'loss', 'content': 0.21041731536388397, 'timestamp': '2025-09-10 02:29:33.455216', 'step': 2995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:33.487316', 'step': 2995, 'epoch': 1} {'type': 'loss', 'content': 0.1693945676088333, 'timestamp': '2025-09-10 02:29:33.511166', 'step': 2996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:33.542755', 'step': 2996, 'epoch': 1} {'type': 'loss', 'content': 0.15124866366386414, 'timestamp': '2025-09-10 02:29:33.545088', 'step': 2997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:33.575487', 'step': 2997, 'epoch': 1} {'type': 'loss', 'content': 0.15263979136943817, 'timestamp': '2025-09-10 02:29:33.577893', 'step': 2998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:33.609082', 'step': 2998, 'epoch': 1} {'type': 'loss', 'content': 0.13643337786197662, 'timestamp': '2025-09-10 02:29:33.613275', 'step': 2999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:33.645072', 'step': 2999, 'epoch': 1} {'type': 'loss', 'content': 0.1401355117559433, 'timestamp': '2025-09-10 02:29:33.669064', 'step': 3000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-09-10 02:29:38.505491', 'step': 3000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.551836', 'step': 3000, 'epoch': 1} {'type': 'loss', 'content': 0.10234826058149338, 'timestamp': '2025-09-10 02:29:38.553614', 'step': 3001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.585058', 'step': 3001, 'epoch': 1} {'type': 'loss', 'content': 0.1610599309206009, 'timestamp': '2025-09-10 02:29:38.587401', 'step': 3002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.619644', 'step': 3002, 'epoch': 1} {'type': 'loss', 'content': 0.1418621689081192, 'timestamp': '2025-09-10 02:29:38.621939', 'step': 3003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.651821', 'step': 3003, 'epoch': 1} {'type': 'loss', 'content': 0.1342315375804901, 'timestamp': '2025-09-10 02:29:38.675078', 'step': 3004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.706277', 'step': 3004, 'epoch': 1} {'type': 'loss', 'content': 0.09252428263425827, 'timestamp': '2025-09-10 02:29:38.708860', 'step': 3005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.740265', 'step': 3005, 'epoch': 1} {'type': 'loss', 'content': 0.16106194257736206, 'timestamp': '2025-09-10 02:29:38.742529', 'step': 3006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:38.774077', 'step': 3006, 'epoch': 1} {'type': 'loss', 'content': 0.19501852989196777, 'timestamp': '2025-09-10 02:29:38.776227', 'step': 3007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.806769', 'step': 3007, 'epoch': 1} {'type': 'loss', 'content': 0.0727355107665062, 'timestamp': '2025-09-10 02:29:38.830460', 'step': 3008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.861365', 'step': 3008, 'epoch': 1} {'type': 'loss', 'content': 0.20133076608181, 'timestamp': '2025-09-10 02:29:38.863251', 'step': 3009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:38.893028', 'step': 3009, 'epoch': 1} {'type': 'loss', 'content': 0.26895320415496826, 'timestamp': '2025-09-10 02:29:38.896890', 'step': 3010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:38.928096', 'step': 3010, 'epoch': 1} {'type': 'loss', 'content': 0.14434930682182312, 'timestamp': '2025-09-10 02:29:38.930464', 'step': 3011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.961565', 'step': 3011, 'epoch': 1} {'type': 'loss', 'content': 0.07360302656888962, 'timestamp': '2025-09-10 02:29:38.985145', 'step': 3012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.015687', 'step': 3012, 'epoch': 1} {'type': 'loss', 'content': 0.1565740406513214, 'timestamp': '2025-09-10 02:29:39.018182', 'step': 3013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.049338', 'step': 3013, 'epoch': 1} {'type': 'loss', 'content': 0.11427225172519684, 'timestamp': '2025-09-10 02:29:39.051508', 'step': 3014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.081129', 'step': 3014, 'epoch': 1} {'type': 'loss', 'content': 0.13325907289981842, 'timestamp': '2025-09-10 02:29:39.084095', 'step': 3015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.114810', 'step': 3015, 'epoch': 1} {'type': 'loss', 'content': 0.23750090599060059, 'timestamp': '2025-09-10 02:29:39.138424', 'step': 3016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:39.169566', 'step': 3016, 'epoch': 1} {'type': 'loss', 'content': 0.1920505315065384, 'timestamp': '2025-09-10 02:29:39.171503', 'step': 3017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:39.203037', 'step': 3017, 'epoch': 1} {'type': 'loss', 'content': 0.21338030695915222, 'timestamp': '2025-09-10 02:29:39.205395', 'step': 3018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.235775', 'step': 3018, 'epoch': 1} {'type': 'loss', 'content': 0.10180147737264633, 'timestamp': '2025-09-10 02:29:39.238092', 'step': 3019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:39.269071', 'step': 3019, 'epoch': 1} {'type': 'loss', 'content': 0.15260940790176392, 'timestamp': '2025-09-10 02:29:39.292606', 'step': 3020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.325068', 'step': 3020, 'epoch': 1} {'type': 'loss', 'content': 0.1524389237165451, 'timestamp': '2025-09-10 02:29:39.328739', 'step': 3021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:39.362276', 'step': 3021, 'epoch': 1} {'type': 'loss', 'content': 0.13227087259292603, 'timestamp': '2025-09-10 02:29:39.365006', 'step': 3022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.394902', 'step': 3022, 'epoch': 1} {'type': 'loss', 'content': 0.21414516866207123, 'timestamp': '2025-09-10 02:29:39.404690', 'step': 3023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.440377', 'step': 3023, 'epoch': 1} {'type': 'loss', 'content': 0.21696577966213226, 'timestamp': '2025-09-10 02:29:39.464201', 'step': 3024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:39.495780', 'step': 3024, 'epoch': 1} {'type': 'loss', 'content': 0.18717463314533234, 'timestamp': '2025-09-10 02:29:39.497887', 'step': 3025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.529777', 'step': 3025, 'epoch': 1} {'type': 'loss', 'content': 0.08869902044534683, 'timestamp': '2025-09-10 02:29:39.532203', 'step': 3026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:39.562943', 'step': 3026, 'epoch': 1} {'type': 'loss', 'content': 0.20763398706912994, 'timestamp': '2025-09-10 02:29:39.565249', 'step': 3027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:39.597488', 'step': 3027, 'epoch': 1} {'type': 'loss', 'content': 0.218856081366539, 'timestamp': '2025-09-10 02:29:39.621032', 'step': 3028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.651485', 'step': 3028, 'epoch': 1} {'type': 'loss', 'content': 0.11300124228000641, 'timestamp': '2025-09-10 02:29:39.653682', 'step': 3029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.684071', 'step': 3029, 'epoch': 1} {'type': 'loss', 'content': 0.20648010075092316, 'timestamp': '2025-09-10 02:29:39.686458', 'step': 3030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.717286', 'step': 3030, 'epoch': 1} {'type': 'loss', 'content': 0.1611136943101883, 'timestamp': '2025-09-10 02:29:39.719466', 'step': 3031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.749761', 'step': 3031, 'epoch': 1} {'type': 'loss', 'content': 0.14830254018306732, 'timestamp': '2025-09-10 02:29:39.773547', 'step': 3032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:39.809445', 'step': 3032, 'epoch': 1} {'type': 'loss', 'content': 0.17117483913898468, 'timestamp': '2025-09-10 02:29:39.812983', 'step': 3033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.853423', 'step': 3033, 'epoch': 1} {'type': 'loss', 'content': 0.17632947862148285, 'timestamp': '2025-09-10 02:29:39.858268', 'step': 3034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:39.895178', 'step': 3034, 'epoch': 1} {'type': 'loss', 'content': 0.14254656434059143, 'timestamp': '2025-09-10 02:29:39.899119', 'step': 3035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.939494', 'step': 3035, 'epoch': 1} {'type': 'loss', 'content': 0.19462169706821442, 'timestamp': '2025-09-10 02:29:39.965874', 'step': 3036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.001466', 'step': 3036, 'epoch': 1} {'type': 'loss', 'content': 0.13903844356536865, 'timestamp': '2025-09-10 02:29:40.004562', 'step': 3037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.042279', 'step': 3037, 'epoch': 1} {'type': 'loss', 'content': 0.2517993450164795, 'timestamp': '2025-09-10 02:29:40.045761', 'step': 3038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.080869', 'step': 3038, 'epoch': 1} {'type': 'loss', 'content': 0.12403962016105652, 'timestamp': '2025-09-10 02:29:40.083180', 'step': 3039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:40.114652', 'step': 3039, 'epoch': 1} {'type': 'loss', 'content': 0.23866741359233856, 'timestamp': '2025-09-10 02:29:40.138157', 'step': 3040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:40.168767', 'step': 3040, 'epoch': 1} {'type': 'loss', 'content': 0.09454205632209778, 'timestamp': '2025-09-10 02:29:40.170654', 'step': 3041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.201519', 'step': 3041, 'epoch': 1} {'type': 'loss', 'content': 0.15113820135593414, 'timestamp': '2025-09-10 02:29:40.203636', 'step': 3042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.235763', 'step': 3042, 'epoch': 1} {'type': 'loss', 'content': 0.198605015873909, 'timestamp': '2025-09-10 02:29:40.238038', 'step': 3043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.269068', 'step': 3043, 'epoch': 1} {'type': 'loss', 'content': 0.21094807982444763, 'timestamp': '2025-09-10 02:29:40.292610', 'step': 3044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:40.323414', 'step': 3044, 'epoch': 1} {'type': 'loss', 'content': 0.2674022614955902, 'timestamp': '2025-09-10 02:29:40.325473', 'step': 3045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.359077', 'step': 3045, 'epoch': 1} {'type': 'loss', 'content': 0.11601927876472473, 'timestamp': '2025-09-10 02:29:40.361252', 'step': 3046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:40.391332', 'step': 3046, 'epoch': 1} {'type': 'loss', 'content': 0.2472119778394699, 'timestamp': '2025-09-10 02:29:40.393789', 'step': 3047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:40.424566', 'step': 3047, 'epoch': 1} {'type': 'loss', 'content': 0.12666283547878265, 'timestamp': '2025-09-10 02:29:40.448334', 'step': 3048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:40.478932', 'step': 3048, 'epoch': 1} {'type': 'loss', 'content': 0.1376998871564865, 'timestamp': '2025-09-10 02:29:40.481163', 'step': 3049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.511453', 'step': 3049, 'epoch': 1} {'type': 'loss', 'content': 0.10772465169429779, 'timestamp': '2025-09-10 02:29:40.513520', 'step': 3050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.546290', 'step': 3050, 'epoch': 1} {'type': 'loss', 'content': 0.17832013964653015, 'timestamp': '2025-09-10 02:29:40.549203', 'step': 3051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.579908', 'step': 3051, 'epoch': 1} {'type': 'loss', 'content': 0.1811629682779312, 'timestamp': '2025-09-10 02:29:40.603513', 'step': 3052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.639370', 'step': 3052, 'epoch': 1} {'type': 'loss', 'content': 0.22584685683250427, 'timestamp': '2025-09-10 02:29:40.641812', 'step': 3053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:40.672802', 'step': 3053, 'epoch': 1} {'type': 'loss', 'content': 0.20133748650550842, 'timestamp': '2025-09-10 02:29:40.674468', 'step': 3054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.712981', 'step': 3054, 'epoch': 1} {'type': 'loss', 'content': 0.1276191622018814, 'timestamp': '2025-09-10 02:29:40.718406', 'step': 3055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.759958', 'step': 3055, 'epoch': 1} {'type': 'loss', 'content': 0.19198447465896606, 'timestamp': '2025-09-10 02:29:40.783352', 'step': 3056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.821564', 'step': 3056, 'epoch': 1} {'type': 'loss', 'content': 0.09162623435258865, 'timestamp': '2025-09-10 02:29:40.826196', 'step': 3057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.858003', 'step': 3057, 'epoch': 1} {'type': 'loss', 'content': 0.3021068871021271, 'timestamp': '2025-09-10 02:29:40.876610', 'step': 3058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:40.926920', 'step': 3058, 'epoch': 1} {'type': 'loss', 'content': 0.13720501959323883, 'timestamp': '2025-09-10 02:29:40.942205', 'step': 3059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.032520', 'step': 3059, 'epoch': 1} {'type': 'loss', 'content': 0.2708348333835602, 'timestamp': '2025-09-10 02:29:41.063216', 'step': 3060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:41.114720', 'step': 3060, 'epoch': 1} {'type': 'loss', 'content': 0.07282231748104095, 'timestamp': '2025-09-10 02:29:41.122745', 'step': 3061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.187914', 'step': 3061, 'epoch': 1} {'type': 'loss', 'content': 0.13018596172332764, 'timestamp': '2025-09-10 02:29:41.196911', 'step': 3062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:41.263294', 'step': 3062, 'epoch': 1} {'type': 'loss', 'content': 0.2447517216205597, 'timestamp': '2025-09-10 02:29:41.282784', 'step': 3063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.374571', 'step': 3063, 'epoch': 1} {'type': 'loss', 'content': 0.1475885957479477, 'timestamp': '2025-09-10 02:29:41.401242', 'step': 3064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:41.460685', 'step': 3064, 'epoch': 1} {'type': 'loss', 'content': 0.19194477796554565, 'timestamp': '2025-09-10 02:29:41.472570', 'step': 3065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:41.565103', 'step': 3065, 'epoch': 1} {'type': 'loss', 'content': 0.10557301342487335, 'timestamp': '2025-09-10 02:29:41.579932', 'step': 3066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:41.646041', 'step': 3066, 'epoch': 1} {'type': 'loss', 'content': 0.16873693466186523, 'timestamp': '2025-09-10 02:29:41.667105', 'step': 3067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:41.728919', 'step': 3067, 'epoch': 1} {'type': 'loss', 'content': 0.13005845248699188, 'timestamp': '2025-09-10 02:29:41.765032', 'step': 3068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:41.828001', 'step': 3068, 'epoch': 1} {'type': 'loss', 'content': 0.2467397302389145, 'timestamp': '2025-09-10 02:29:41.836273', 'step': 3069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.900156', 'step': 3069, 'epoch': 1} {'type': 'loss', 'content': 0.22058424353599548, 'timestamp': '2025-09-10 02:29:41.904119', 'step': 3070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.964665', 'step': 3070, 'epoch': 1} {'type': 'loss', 'content': 0.214870885014534, 'timestamp': '2025-09-10 02:29:41.979150', 'step': 3071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.064909', 'step': 3071, 'epoch': 1} {'type': 'loss', 'content': 0.23971354961395264, 'timestamp': '2025-09-10 02:29:42.094036', 'step': 3072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:42.128235', 'step': 3072, 'epoch': 1} {'type': 'loss', 'content': 0.22317859530448914, 'timestamp': '2025-09-10 02:29:42.131316', 'step': 3073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.162254', 'step': 3073, 'epoch': 1} {'type': 'loss', 'content': 0.11940674483776093, 'timestamp': '2025-09-10 02:29:42.164628', 'step': 3074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:42.199848', 'step': 3074, 'epoch': 1} {'type': 'loss', 'content': 0.13793089985847473, 'timestamp': '2025-09-10 02:29:42.212194', 'step': 3075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.251998', 'step': 3075, 'epoch': 1} {'type': 'loss', 'content': 0.10505596548318863, 'timestamp': '2025-09-10 02:29:42.275611', 'step': 3076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:42.309564', 'step': 3076, 'epoch': 1} {'type': 'loss', 'content': 0.16055001318454742, 'timestamp': '2025-09-10 02:29:42.311930', 'step': 3077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.346971', 'step': 3077, 'epoch': 1} {'type': 'loss', 'content': 0.15222027897834778, 'timestamp': '2025-09-10 02:29:42.349591', 'step': 3078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:42.388075', 'step': 3078, 'epoch': 1} {'type': 'loss', 'content': 0.12100105732679367, 'timestamp': '2025-09-10 02:29:42.393845', 'step': 3079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:42.431662', 'step': 3079, 'epoch': 1} {'type': 'loss', 'content': 0.2642877697944641, 'timestamp': '2025-09-10 02:29:42.461258', 'step': 3080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.495545', 'step': 3080, 'epoch': 1} {'type': 'loss', 'content': 0.23365212976932526, 'timestamp': '2025-09-10 02:29:42.497613', 'step': 3081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:42.532106', 'step': 3081, 'epoch': 1} {'type': 'loss', 'content': 0.2083313912153244, 'timestamp': '2025-09-10 02:29:42.535490', 'step': 3082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.566687', 'step': 3082, 'epoch': 1} {'type': 'loss', 'content': 0.17008234560489655, 'timestamp': '2025-09-10 02:29:42.569525', 'step': 3083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:42.600395', 'step': 3083, 'epoch': 1} {'type': 'loss', 'content': 0.1465248018503189, 'timestamp': '2025-09-10 02:29:42.625104', 'step': 3084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:42.658915', 'step': 3084, 'epoch': 1} {'type': 'loss', 'content': 0.07526419311761856, 'timestamp': '2025-09-10 02:29:42.661237', 'step': 3085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:42.694200', 'step': 3085, 'epoch': 1} {'type': 'loss', 'content': 0.20646882057189941, 'timestamp': '2025-09-10 02:29:42.701293', 'step': 3086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:42.747519', 'step': 3086, 'epoch': 1} {'type': 'loss', 'content': 0.15557220578193665, 'timestamp': '2025-09-10 02:29:42.750568', 'step': 3087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:42.782955', 'step': 3087, 'epoch': 1} {'type': 'loss', 'content': 0.21963033080101013, 'timestamp': '2025-09-10 02:29:42.808103', 'step': 3088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.855483', 'step': 3088, 'epoch': 1} {'type': 'loss', 'content': 0.27285170555114746, 'timestamp': '2025-09-10 02:29:42.858622', 'step': 3089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:42.892078', 'step': 3089, 'epoch': 1} {'type': 'loss', 'content': 0.1114504337310791, 'timestamp': '2025-09-10 02:29:42.894684', 'step': 3090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:42.944143', 'step': 3090, 'epoch': 1} {'type': 'loss', 'content': 0.20656152069568634, 'timestamp': '2025-09-10 02:29:42.948362', 'step': 3091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:42.983652', 'step': 3091, 'epoch': 1} {'type': 'loss', 'content': 0.1611335426568985, 'timestamp': '2025-09-10 02:29:43.007330', 'step': 3092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:43.042536', 'step': 3092, 'epoch': 1} {'type': 'loss', 'content': 0.20605701208114624, 'timestamp': '2025-09-10 02:29:43.044732', 'step': 3093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:43.079379', 'step': 3093, 'epoch': 1} {'type': 'loss', 'content': 0.11084610223770142, 'timestamp': '2025-09-10 02:29:43.082297', 'step': 3094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:43.116941', 'step': 3094, 'epoch': 1} {'type': 'loss', 'content': 0.12562383711338043, 'timestamp': '2025-09-10 02:29:43.120452', 'step': 3095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:43.152806', 'step': 3095, 'epoch': 1} {'type': 'loss', 'content': 0.1729806512594223, 'timestamp': '2025-09-10 02:29:43.180587', 'step': 3096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:43.214345', 'step': 3096, 'epoch': 1} {'type': 'loss', 'content': 0.09073489159345627, 'timestamp': '2025-09-10 02:29:43.219098', 'step': 3097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:43.252293', 'step': 3097, 'epoch': 1} {'type': 'loss', 'content': 0.22624172270298004, 'timestamp': '2025-09-10 02:29:43.257513', 'step': 3098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:43.292192', 'step': 3098, 'epoch': 1} {'type': 'loss', 'content': 0.15442177653312683, 'timestamp': '2025-09-10 02:29:43.294714', 'step': 3099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:43.326061', 'step': 3099, 'epoch': 1} {'type': 'loss', 'content': 0.20034478604793549, 'timestamp': '2025-09-10 02:29:43.349660', 'step': 3100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:43.385794', 'step': 3100, 'epoch': 1} {'type': 'loss', 'content': 0.22012580931186676, 'timestamp': '2025-09-10 02:29:43.388213', 'step': 3101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:43.419498', 'step': 3101, 'epoch': 1} {'type': 'loss', 'content': 0.10181260108947754, 'timestamp': '2025-09-10 02:29:43.421877', 'step': 3102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:43.452958', 'step': 3102, 'epoch': 1} {'type': 'loss', 'content': 0.20532160997390747, 'timestamp': '2025-09-10 02:29:43.455490', 'step': 3103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:43.488711', 'step': 3103, 'epoch': 1} {'type': 'loss', 'content': 0.09641300141811371, 'timestamp': '2025-09-10 02:29:43.512328', 'step': 3104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:43.543398', 'step': 3104, 'epoch': 1} {'type': 'loss', 'content': 0.2199927121400833, 'timestamp': '2025-09-10 02:29:43.545536', 'step': 3105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:43.576757', 'step': 3105, 'epoch': 1} {'type': 'loss', 'content': 0.1676185578107834, 'timestamp': '2025-09-10 02:29:43.579263', 'step': 3106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:43.614303', 'step': 3106, 'epoch': 1} {'type': 'loss', 'content': 0.12249243259429932, 'timestamp': '2025-09-10 02:29:43.617015', 'step': 3107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:43.648732', 'step': 3107, 'epoch': 1} {'type': 'loss', 'content': 0.11330673098564148, 'timestamp': '2025-09-10 02:29:43.672223', 'step': 3108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:43.702476', 'step': 3108, 'epoch': 1} {'type': 'loss', 'content': 0.14746089279651642, 'timestamp': '2025-09-10 02:29:43.711414', 'step': 3109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:43.744752', 'step': 3109, 'epoch': 1} {'type': 'loss', 'content': 0.15180107951164246, 'timestamp': '2025-09-10 02:29:43.746842', 'step': 3110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:43.779856', 'step': 3110, 'epoch': 1} {'type': 'loss', 'content': 0.12148897349834442, 'timestamp': '2025-09-10 02:29:43.782298', 'step': 3111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:43.815484', 'step': 3111, 'epoch': 1} {'type': 'loss', 'content': 0.1997445970773697, 'timestamp': '2025-09-10 02:29:43.841283', 'step': 3112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:43.872497', 'step': 3112, 'epoch': 1} {'type': 'loss', 'content': 0.127357617020607, 'timestamp': '2025-09-10 02:29:43.874635', 'step': 3113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:43.905777', 'step': 3113, 'epoch': 1} {'type': 'loss', 'content': 0.1743139624595642, 'timestamp': '2025-09-10 02:29:43.908030', 'step': 3114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:43.938698', 'step': 3114, 'epoch': 1} {'type': 'loss', 'content': 0.13870657980442047, 'timestamp': '2025-09-10 02:29:43.941393', 'step': 3115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:43.972244', 'step': 3115, 'epoch': 1} {'type': 'loss', 'content': 0.29971247911453247, 'timestamp': '2025-09-10 02:29:43.995698', 'step': 3116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:44.026696', 'step': 3116, 'epoch': 1} {'type': 'loss', 'content': 0.18124380707740784, 'timestamp': '2025-09-10 02:29:44.029056', 'step': 3117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.059849', 'step': 3117, 'epoch': 1} {'type': 'loss', 'content': 0.12584087252616882, 'timestamp': '2025-09-10 02:29:44.062476', 'step': 3118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:44.094971', 'step': 3118, 'epoch': 1} {'type': 'loss', 'content': 0.15191565454006195, 'timestamp': '2025-09-10 02:29:44.097427', 'step': 3119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:44.132991', 'step': 3119, 'epoch': 1} {'type': 'loss', 'content': 0.11588723212480545, 'timestamp': '2025-09-10 02:29:44.156957', 'step': 3120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.193960', 'step': 3120, 'epoch': 1} {'type': 'loss', 'content': 0.13556843996047974, 'timestamp': '2025-09-10 02:29:44.196463', 'step': 3121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:44.227507', 'step': 3121, 'epoch': 1} {'type': 'loss', 'content': 0.1870545744895935, 'timestamp': '2025-09-10 02:29:44.230030', 'step': 3122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:44.261653', 'step': 3122, 'epoch': 1} {'type': 'loss', 'content': 0.11874767392873764, 'timestamp': '2025-09-10 02:29:44.264113', 'step': 3123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:44.295949', 'step': 3123, 'epoch': 1} {'type': 'loss', 'content': 0.13794012367725372, 'timestamp': '2025-09-10 02:29:44.319719', 'step': 3124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:44.352309', 'step': 3124, 'epoch': 1} {'type': 'loss', 'content': 0.13911239802837372, 'timestamp': '2025-09-10 02:29:44.356058', 'step': 3125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.386697', 'step': 3125, 'epoch': 1} {'type': 'loss', 'content': 0.1787731796503067, 'timestamp': '2025-09-10 02:29:44.389111', 'step': 3126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:44.420759', 'step': 3126, 'epoch': 1} {'type': 'loss', 'content': 0.16531552374362946, 'timestamp': '2025-09-10 02:29:44.423573', 'step': 3127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:44.455058', 'step': 3127, 'epoch': 1} {'type': 'loss', 'content': 0.12693865597248077, 'timestamp': '2025-09-10 02:29:44.480120', 'step': 3128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:44.511065', 'step': 3128, 'epoch': 1} {'type': 'loss', 'content': 0.15283045172691345, 'timestamp': '2025-09-10 02:29:44.513200', 'step': 3129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:44.545068', 'step': 3129, 'epoch': 1} {'type': 'loss', 'content': 0.25866350531578064, 'timestamp': '2025-09-10 02:29:44.548508', 'step': 3130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:44.581998', 'step': 3130, 'epoch': 1} {'type': 'loss', 'content': 0.13968247175216675, 'timestamp': '2025-09-10 02:29:44.584735', 'step': 3131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:44.618044', 'step': 3131, 'epoch': 1} {'type': 'loss', 'content': 0.1302327811717987, 'timestamp': '2025-09-10 02:29:44.642256', 'step': 3132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:44.673439', 'step': 3132, 'epoch': 1} {'type': 'loss', 'content': 0.2137773334980011, 'timestamp': '2025-09-10 02:29:44.677222', 'step': 3133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.750976', 'step': 3133, 'epoch': 1} {'type': 'loss', 'content': 0.15226830542087555, 'timestamp': '2025-09-10 02:29:44.769404', 'step': 3134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:44.833943', 'step': 3134, 'epoch': 1} {'type': 'loss', 'content': 0.19696307182312012, 'timestamp': '2025-09-10 02:29:44.836762', 'step': 3135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:44.912993', 'step': 3135, 'epoch': 1} {'type': 'loss', 'content': 0.183464914560318, 'timestamp': '2025-09-10 02:29:44.953904', 'step': 3136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:45.031234', 'step': 3136, 'epoch': 1} {'type': 'loss', 'content': 0.1562694013118744, 'timestamp': '2025-09-10 02:29:45.036182', 'step': 3137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.073123', 'step': 3137, 'epoch': 1} {'type': 'loss', 'content': 0.11024736613035202, 'timestamp': '2025-09-10 02:29:45.077280', 'step': 3138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:45.133946', 'step': 3138, 'epoch': 1} {'type': 'loss', 'content': 0.12203805893659592, 'timestamp': '2025-09-10 02:29:45.139994', 'step': 3139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.184261', 'step': 3139, 'epoch': 1} {'type': 'loss', 'content': 0.21185575425624847, 'timestamp': '2025-09-10 02:29:45.209588', 'step': 3140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:45.242333', 'step': 3140, 'epoch': 1} {'type': 'loss', 'content': 0.22572927176952362, 'timestamp': '2025-09-10 02:29:45.244801', 'step': 3141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.277828', 'step': 3141, 'epoch': 1} {'type': 'loss', 'content': 0.13861066102981567, 'timestamp': '2025-09-10 02:29:45.280265', 'step': 3142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:45.318004', 'step': 3142, 'epoch': 1} {'type': 'loss', 'content': 0.30941319465637207, 'timestamp': '2025-09-10 02:29:45.321110', 'step': 3143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.354429', 'step': 3143, 'epoch': 1} {'type': 'loss', 'content': 0.18556030094623566, 'timestamp': '2025-09-10 02:29:45.381371', 'step': 3144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:45.415428', 'step': 3144, 'epoch': 1} {'type': 'loss', 'content': 0.1649017035961151, 'timestamp': '2025-09-10 02:29:45.418095', 'step': 3145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.449768', 'step': 3145, 'epoch': 1} {'type': 'loss', 'content': 0.22192740440368652, 'timestamp': '2025-09-10 02:29:45.452409', 'step': 3146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.483325', 'step': 3146, 'epoch': 1} {'type': 'loss', 'content': 0.17082823812961578, 'timestamp': '2025-09-10 02:29:45.487083', 'step': 3147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.524428', 'step': 3147, 'epoch': 1} {'type': 'loss', 'content': 0.2640916407108307, 'timestamp': '2025-09-10 02:29:45.549528', 'step': 3148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.581951', 'step': 3148, 'epoch': 1} {'type': 'loss', 'content': 0.20492371916770935, 'timestamp': '2025-09-10 02:29:45.584345', 'step': 3149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.617098', 'step': 3149, 'epoch': 1} {'type': 'loss', 'content': 0.18813610076904297, 'timestamp': '2025-09-10 02:29:45.619399', 'step': 3150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.651053', 'step': 3150, 'epoch': 1} {'type': 'loss', 'content': 0.27534356713294983, 'timestamp': '2025-09-10 02:29:45.653410', 'step': 3151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.684339', 'step': 3151, 'epoch': 1} {'type': 'loss', 'content': 0.13025328516960144, 'timestamp': '2025-09-10 02:29:45.708480', 'step': 3152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.755962', 'step': 3152, 'epoch': 1} {'type': 'loss', 'content': 0.166564479470253, 'timestamp': '2025-09-10 02:29:45.763862', 'step': 3153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.808969', 'step': 3153, 'epoch': 1} {'type': 'loss', 'content': 0.11821554601192474, 'timestamp': '2025-09-10 02:29:45.813861', 'step': 3154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.852855', 'step': 3154, 'epoch': 1} {'type': 'loss', 'content': 0.19290471076965332, 'timestamp': '2025-09-10 02:29:45.856943', 'step': 3155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.888912', 'step': 3155, 'epoch': 1} {'type': 'loss', 'content': 0.1890704482793808, 'timestamp': '2025-09-10 02:29:45.912345', 'step': 3156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:45.944272', 'step': 3156, 'epoch': 1} {'type': 'loss', 'content': 0.20846669375896454, 'timestamp': '2025-09-10 02:29:45.946715', 'step': 3157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.978556', 'step': 3157, 'epoch': 1} {'type': 'loss', 'content': 0.20890039205551147, 'timestamp': '2025-09-10 02:29:45.980621', 'step': 3158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.010787', 'step': 3158, 'epoch': 1} {'type': 'loss', 'content': 0.13525094091892242, 'timestamp': '2025-09-10 02:29:46.013141', 'step': 3159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.043686', 'step': 3159, 'epoch': 1} {'type': 'loss', 'content': 0.19218198955059052, 'timestamp': '2025-09-10 02:29:46.070229', 'step': 3160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:46.102510', 'step': 3160, 'epoch': 1} {'type': 'loss', 'content': 0.17032407224178314, 'timestamp': '2025-09-10 02:29:46.105022', 'step': 3161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.137159', 'step': 3161, 'epoch': 1} {'type': 'loss', 'content': 0.13605937361717224, 'timestamp': '2025-09-10 02:29:46.139499', 'step': 3162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.171248', 'step': 3162, 'epoch': 1} {'type': 'loss', 'content': 0.24155426025390625, 'timestamp': '2025-09-10 02:29:46.174082', 'step': 3163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:46.204975', 'step': 3163, 'epoch': 1} {'type': 'loss', 'content': 0.23094679415225983, 'timestamp': '2025-09-10 02:29:46.228955', 'step': 3164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:46.261542', 'step': 3164, 'epoch': 1} {'type': 'loss', 'content': 0.28855058550834656, 'timestamp': '2025-09-10 02:29:46.263915', 'step': 3165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.295600', 'step': 3165, 'epoch': 1} {'type': 'loss', 'content': 0.20440827310085297, 'timestamp': '2025-09-10 02:29:46.297990', 'step': 3166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.330754', 'step': 3166, 'epoch': 1} {'type': 'loss', 'content': 0.22427229583263397, 'timestamp': '2025-09-10 02:29:46.332864', 'step': 3167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:46.363877', 'step': 3167, 'epoch': 1} {'type': 'loss', 'content': 0.21844825148582458, 'timestamp': '2025-09-10 02:29:46.388482', 'step': 3168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:46.420688', 'step': 3168, 'epoch': 1} {'type': 'loss', 'content': 0.0911698192358017, 'timestamp': '2025-09-10 02:29:46.423013', 'step': 3169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.454213', 'step': 3169, 'epoch': 1} {'type': 'loss', 'content': 0.11353575438261032, 'timestamp': '2025-09-10 02:29:46.456514', 'step': 3170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.487923', 'step': 3170, 'epoch': 1} {'type': 'loss', 'content': 0.1638893336057663, 'timestamp': '2025-09-10 02:29:46.489905', 'step': 3171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:46.521329', 'step': 3171, 'epoch': 1} {'type': 'loss', 'content': 0.20354409515857697, 'timestamp': '2025-09-10 02:29:46.545117', 'step': 3172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.577506', 'step': 3172, 'epoch': 1} {'type': 'loss', 'content': 0.17418043315410614, 'timestamp': '2025-09-10 02:29:46.579373', 'step': 3173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.610236', 'step': 3173, 'epoch': 1} {'type': 'loss', 'content': 0.15899142622947693, 'timestamp': '2025-09-10 02:29:46.612449', 'step': 3174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:46.644138', 'step': 3174, 'epoch': 1} {'type': 'loss', 'content': 0.1717604249715805, 'timestamp': '2025-09-10 02:29:46.646716', 'step': 3175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.677319', 'step': 3175, 'epoch': 1} {'type': 'loss', 'content': 0.23327931761741638, 'timestamp': '2025-09-10 02:29:46.705177', 'step': 3176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.754875', 'step': 3176, 'epoch': 1} {'type': 'loss', 'content': 0.1874702274799347, 'timestamp': '2025-09-10 02:29:46.764200', 'step': 3177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.805100', 'step': 3177, 'epoch': 1} {'type': 'loss', 'content': 0.22760827839374542, 'timestamp': '2025-09-10 02:29:46.814094', 'step': 3178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.856607', 'step': 3178, 'epoch': 1} {'type': 'loss', 'content': 0.1856229156255722, 'timestamp': '2025-09-10 02:29:46.866752', 'step': 3179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.909413', 'step': 3179, 'epoch': 1} {'type': 'loss', 'content': 0.1376888006925583, 'timestamp': '2025-09-10 02:29:46.938976', 'step': 3180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:46.976733', 'step': 3180, 'epoch': 1} {'type': 'loss', 'content': 0.1519991010427475, 'timestamp': '2025-09-10 02:29:46.982206', 'step': 3181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.020056', 'step': 3181, 'epoch': 1} {'type': 'loss', 'content': 0.12729378044605255, 'timestamp': '2025-09-10 02:29:47.023256', 'step': 3182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.070692', 'step': 3182, 'epoch': 1} {'type': 'loss', 'content': 0.1551191210746765, 'timestamp': '2025-09-10 02:29:47.072839', 'step': 3183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.103844', 'step': 3183, 'epoch': 1} {'type': 'loss', 'content': 0.16706323623657227, 'timestamp': '2025-09-10 02:29:47.127245', 'step': 3184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:47.158046', 'step': 3184, 'epoch': 1} {'type': 'loss', 'content': 0.2582113444805145, 'timestamp': '2025-09-10 02:29:47.160582', 'step': 3185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.192189', 'step': 3185, 'epoch': 1} {'type': 'loss', 'content': 0.19390274584293365, 'timestamp': '2025-09-10 02:29:47.194251', 'step': 3186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.224846', 'step': 3186, 'epoch': 1} {'type': 'loss', 'content': 0.16466833651065826, 'timestamp': '2025-09-10 02:29:47.227246', 'step': 3187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:47.258523', 'step': 3187, 'epoch': 1} {'type': 'loss', 'content': 0.22869078814983368, 'timestamp': '2025-09-10 02:29:47.281865', 'step': 3188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.312341', 'step': 3188, 'epoch': 1} {'type': 'loss', 'content': 0.14457029104232788, 'timestamp': '2025-09-10 02:29:47.314655', 'step': 3189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.344705', 'step': 3189, 'epoch': 1} {'type': 'loss', 'content': 0.1187521368265152, 'timestamp': '2025-09-10 02:29:47.346821', 'step': 3190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.377064', 'step': 3190, 'epoch': 1} {'type': 'loss', 'content': 0.13558492064476013, 'timestamp': '2025-09-10 02:29:47.379176', 'step': 3191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.410857', 'step': 3191, 'epoch': 1} {'type': 'loss', 'content': 0.12127508223056793, 'timestamp': '2025-09-10 02:29:47.437897', 'step': 3192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:47.467980', 'step': 3192, 'epoch': 1} {'type': 'loss', 'content': 0.13135597109794617, 'timestamp': '2025-09-10 02:29:47.470109', 'step': 3193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:47.500001', 'step': 3193, 'epoch': 1} {'type': 'loss', 'content': 0.19972795248031616, 'timestamp': '2025-09-10 02:29:47.501882', 'step': 3194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.532206', 'step': 3194, 'epoch': 1} {'type': 'loss', 'content': 0.15240386128425598, 'timestamp': '2025-09-10 02:29:47.534175', 'step': 3195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.563619', 'step': 3195, 'epoch': 1} {'type': 'loss', 'content': 0.10739652812480927, 'timestamp': '2025-09-10 02:29:47.586985', 'step': 3196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.616670', 'step': 3196, 'epoch': 1} {'type': 'loss', 'content': 0.0808168575167656, 'timestamp': '2025-09-10 02:29:47.618749', 'step': 3197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:47.648981', 'step': 3197, 'epoch': 1} {'type': 'loss', 'content': 0.10105276852846146, 'timestamp': '2025-09-10 02:29:47.651055', 'step': 3198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.680806', 'step': 3198, 'epoch': 1} {'type': 'loss', 'content': 0.18991586565971375, 'timestamp': '2025-09-10 02:29:47.682913', 'step': 3199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.713215', 'step': 3199, 'epoch': 1} {'type': 'loss', 'content': 0.12076035887002945, 'timestamp': '2025-09-10 02:29:47.736757', 'step': 3200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.767604', 'step': 3200, 'epoch': 1} {'type': 'loss', 'content': 0.19043570756912231, 'timestamp': '2025-09-10 02:29:47.769665', 'step': 3201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.800594', 'step': 3201, 'epoch': 1} {'type': 'loss', 'content': 0.2631802260875702, 'timestamp': '2025-09-10 02:29:47.802923', 'step': 3202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.833670', 'step': 3202, 'epoch': 1} {'type': 'loss', 'content': 0.19179078936576843, 'timestamp': '2025-09-10 02:29:47.836369', 'step': 3203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.866711', 'step': 3203, 'epoch': 1} {'type': 'loss', 'content': 0.20799939334392548, 'timestamp': '2025-09-10 02:29:47.890198', 'step': 3204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.919867', 'step': 3204, 'epoch': 1} {'type': 'loss', 'content': 0.2033817321062088, 'timestamp': '2025-09-10 02:29:47.923456', 'step': 3205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.954547', 'step': 3205, 'epoch': 1} {'type': 'loss', 'content': 0.15132306516170502, 'timestamp': '2025-09-10 02:29:47.956504', 'step': 3206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:47.986918', 'step': 3206, 'epoch': 1} {'type': 'loss', 'content': 0.12427970767021179, 'timestamp': '2025-09-10 02:29:47.989714', 'step': 3207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:48.018783', 'step': 3207, 'epoch': 1} {'type': 'loss', 'content': 0.13920234143733978, 'timestamp': '2025-09-10 02:29:48.043794', 'step': 3208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.074109', 'step': 3208, 'epoch': 1} {'type': 'loss', 'content': 0.08861689269542694, 'timestamp': '2025-09-10 02:29:48.076812', 'step': 3209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.107468', 'step': 3209, 'epoch': 1} {'type': 'loss', 'content': 0.2204521894454956, 'timestamp': '2025-09-10 02:29:48.109669', 'step': 3210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.141014', 'step': 3210, 'epoch': 1} {'type': 'loss', 'content': 0.1966957300901413, 'timestamp': '2025-09-10 02:29:48.143180', 'step': 3211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:48.174619', 'step': 3211, 'epoch': 1} {'type': 'loss', 'content': 0.1314495950937271, 'timestamp': '2025-09-10 02:29:48.197943', 'step': 3212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.227937', 'step': 3212, 'epoch': 1} {'type': 'loss', 'content': 0.13904930651187897, 'timestamp': '2025-09-10 02:29:48.230345', 'step': 3213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.260028', 'step': 3213, 'epoch': 1} {'type': 'loss', 'content': 0.15378588438034058, 'timestamp': '2025-09-10 02:29:48.262135', 'step': 3214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.292687', 'step': 3214, 'epoch': 1} {'type': 'loss', 'content': 0.17015264928340912, 'timestamp': '2025-09-10 02:29:48.294565', 'step': 3215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.323749', 'step': 3215, 'epoch': 1} {'type': 'loss', 'content': 0.130588561296463, 'timestamp': '2025-09-10 02:29:48.348455', 'step': 3216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.379199', 'step': 3216, 'epoch': 1} {'type': 'loss', 'content': 0.2334803193807602, 'timestamp': '2025-09-10 02:29:48.381440', 'step': 3217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:48.412014', 'step': 3217, 'epoch': 1} {'type': 'loss', 'content': 0.1429053694009781, 'timestamp': '2025-09-10 02:29:48.414467', 'step': 3218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.450311', 'step': 3218, 'epoch': 1} {'type': 'loss', 'content': 0.16020743548870087, 'timestamp': '2025-09-10 02:29:48.452878', 'step': 3219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:48.484392', 'step': 3219, 'epoch': 1} {'type': 'loss', 'content': 0.12728609144687653, 'timestamp': '2025-09-10 02:29:48.507373', 'step': 3220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:48.538802', 'step': 3220, 'epoch': 1} {'type': 'loss', 'content': 0.21096792817115784, 'timestamp': '2025-09-10 02:29:48.541456', 'step': 3221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.573561', 'step': 3221, 'epoch': 1} {'type': 'loss', 'content': 0.16329577565193176, 'timestamp': '2025-09-10 02:29:48.575915', 'step': 3222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:48.609331', 'step': 3222, 'epoch': 1} {'type': 'loss', 'content': 0.2501237988471985, 'timestamp': '2025-09-10 02:29:48.611375', 'step': 3223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:29:48.643704', 'step': 3223, 'epoch': 1} {'type': 'loss', 'content': 0.14179940521717072, 'timestamp': '2025-09-10 02:29:48.669064', 'step': 3224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:48.702558', 'step': 3224, 'epoch': 1} {'type': 'loss', 'content': 0.169158473610878, 'timestamp': '2025-09-10 02:29:48.705134', 'step': 3225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.735240', 'step': 3225, 'epoch': 1} {'type': 'loss', 'content': 0.13072516024112701, 'timestamp': '2025-09-10 02:29:48.737323', 'step': 3226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.767940', 'step': 3226, 'epoch': 1} {'type': 'loss', 'content': 0.1942732185125351, 'timestamp': '2025-09-10 02:29:48.770194', 'step': 3227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.800734', 'step': 3227, 'epoch': 1} {'type': 'loss', 'content': 0.2898584306240082, 'timestamp': '2025-09-10 02:29:48.824254', 'step': 3228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:48.854726', 'step': 3228, 'epoch': 1} {'type': 'loss', 'content': 0.1091361790895462, 'timestamp': '2025-09-10 02:29:48.856750', 'step': 3229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.889345', 'step': 3229, 'epoch': 1} {'type': 'loss', 'content': 0.1646888554096222, 'timestamp': '2025-09-10 02:29:48.893840', 'step': 3230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:48.930565', 'step': 3230, 'epoch': 1} {'type': 'loss', 'content': 0.1307302713394165, 'timestamp': '2025-09-10 02:29:48.939555', 'step': 3231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:48.975781', 'step': 3231, 'epoch': 1} {'type': 'loss', 'content': 0.21625906229019165, 'timestamp': '2025-09-10 02:29:49.000677', 'step': 3232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:49.034646', 'step': 3232, 'epoch': 1} {'type': 'loss', 'content': 0.11553147435188293, 'timestamp': '2025-09-10 02:29:49.038866', 'step': 3233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.075328', 'step': 3233, 'epoch': 1} {'type': 'loss', 'content': 0.11062749475240707, 'timestamp': '2025-09-10 02:29:49.079425', 'step': 3234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:49.114650', 'step': 3234, 'epoch': 1} {'type': 'loss', 'content': 0.2084757387638092, 'timestamp': '2025-09-10 02:29:49.116748', 'step': 3235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.147788', 'step': 3235, 'epoch': 1} {'type': 'loss', 'content': 0.19103442132472992, 'timestamp': '2025-09-10 02:29:49.171454', 'step': 3236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:49.202351', 'step': 3236, 'epoch': 1} {'type': 'loss', 'content': 0.22498786449432373, 'timestamp': '2025-09-10 02:29:49.204099', 'step': 3237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.234378', 'step': 3237, 'epoch': 1} {'type': 'loss', 'content': 0.19200465083122253, 'timestamp': '2025-09-10 02:29:49.237724', 'step': 3238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:49.272795', 'step': 3238, 'epoch': 1} {'type': 'loss', 'content': 0.1264246553182602, 'timestamp': '2025-09-10 02:29:49.274793', 'step': 3239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:49.304656', 'step': 3239, 'epoch': 1} {'type': 'loss', 'content': 0.1921885460615158, 'timestamp': '2025-09-10 02:29:49.328105', 'step': 3240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.361384', 'step': 3240, 'epoch': 1} {'type': 'loss', 'content': 0.15952298045158386, 'timestamp': '2025-09-10 02:29:49.363765', 'step': 3241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:49.397659', 'step': 3241, 'epoch': 1} {'type': 'loss', 'content': 0.23554793000221252, 'timestamp': '2025-09-10 02:29:49.399633', 'step': 3242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:49.429404', 'step': 3242, 'epoch': 1} {'type': 'loss', 'content': 0.1407623440027237, 'timestamp': '2025-09-10 02:29:49.431501', 'step': 3243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.464603', 'step': 3243, 'epoch': 1} {'type': 'loss', 'content': 0.2284650355577469, 'timestamp': '2025-09-10 02:29:49.488544', 'step': 3244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:49.518908', 'step': 3244, 'epoch': 1} {'type': 'loss', 'content': 0.07911677658557892, 'timestamp': '2025-09-10 02:29:49.521245', 'step': 3245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.551931', 'step': 3245, 'epoch': 1} {'type': 'loss', 'content': 0.21451783180236816, 'timestamp': '2025-09-10 02:29:49.553848', 'step': 3246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.584935', 'step': 3246, 'epoch': 1} {'type': 'loss', 'content': 0.0941600576043129, 'timestamp': '2025-09-10 02:29:49.587349', 'step': 3247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:49.617308', 'step': 3247, 'epoch': 1} {'type': 'loss', 'content': 0.21451489627361298, 'timestamp': '2025-09-10 02:29:49.640482', 'step': 3248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.671543', 'step': 3248, 'epoch': 1} {'type': 'loss', 'content': 0.24667753279209137, 'timestamp': '2025-09-10 02:29:49.674027', 'step': 3249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:49.704994', 'step': 3249, 'epoch': 1} {'type': 'loss', 'content': 0.20759734511375427, 'timestamp': '2025-09-10 02:29:49.707125', 'step': 3250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.737600', 'step': 3250, 'epoch': 1} {'type': 'loss', 'content': 0.16339319944381714, 'timestamp': '2025-09-10 02:29:49.740220', 'step': 3251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:49.772187', 'step': 3251, 'epoch': 1} {'type': 'loss', 'content': 0.1396058052778244, 'timestamp': '2025-09-10 02:29:49.795772', 'step': 3252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:49.826107', 'step': 3252, 'epoch': 1} {'type': 'loss', 'content': 0.12659728527069092, 'timestamp': '2025-09-10 02:29:49.828207', 'step': 3253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:49.859250', 'step': 3253, 'epoch': 1} {'type': 'loss', 'content': 0.09373846650123596, 'timestamp': '2025-09-10 02:29:49.861625', 'step': 3254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:49.892714', 'step': 3254, 'epoch': 1} {'type': 'loss', 'content': 0.1285163164138794, 'timestamp': '2025-09-10 02:29:49.895267', 'step': 3255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:49.925551', 'step': 3255, 'epoch': 1} {'type': 'loss', 'content': 0.07875893265008926, 'timestamp': '2025-09-10 02:29:49.948680', 'step': 3256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:49.980342', 'step': 3256, 'epoch': 1} {'type': 'loss', 'content': 0.1626293957233429, 'timestamp': '2025-09-10 02:29:49.982708', 'step': 3257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:50.013711', 'step': 3257, 'epoch': 1} {'type': 'loss', 'content': 0.17858019471168518, 'timestamp': '2025-09-10 02:29:50.015785', 'step': 3258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:50.046841', 'step': 3258, 'epoch': 1} {'type': 'loss', 'content': 0.20572276413440704, 'timestamp': '2025-09-10 02:29:50.049089', 'step': 3259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:50.080539', 'step': 3259, 'epoch': 1} {'type': 'loss', 'content': 0.16380999982357025, 'timestamp': '2025-09-10 02:29:50.104182', 'step': 3260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:50.135319', 'step': 3260, 'epoch': 1} {'type': 'loss', 'content': 0.19626685976982117, 'timestamp': '2025-09-10 02:29:50.137818', 'step': 3261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:50.168684', 'step': 3261, 'epoch': 1} {'type': 'loss', 'content': 0.11382898688316345, 'timestamp': '2025-09-10 02:29:50.171125', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:29:58.367085', 'step': 3262, 'epoch': 1} {'type': 'pplx', 'content': 9456.329877053366, 'timestamp': '2025-09-10 02:29:58.370293', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:58.400341', 'step': 3262, 'epoch': 1} {'type': 'loss', 'content': 0.1692712903022766, 'timestamp': '2025-09-10 02:29:58.404310', 'step': 3263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:29:58.436615', 'step': 3263, 'epoch': 1} {'type': 'loss', 'content': 0.17496418952941895, 'timestamp': '2025-09-10 02:29:58.461366', 'step': 3264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:58.492097', 'step': 3264, 'epoch': 1} {'type': 'loss', 'content': 0.1336602121591568, 'timestamp': '2025-09-10 02:29:58.494841', 'step': 3265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:58.525873', 'step': 3265, 'epoch': 1} {'type': 'loss', 'content': 0.2686935365200043, 'timestamp': '2025-09-10 02:29:58.528630', 'step': 3266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:58.560000', 'step': 3266, 'epoch': 1} {'type': 'loss', 'content': 0.23321250081062317, 'timestamp': '2025-09-10 02:29:58.563218', 'step': 3267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:58.594898', 'step': 3267, 'epoch': 1} {'type': 'loss', 'content': 0.1381356567144394, 'timestamp': '2025-09-10 02:29:58.618751', 'step': 3268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:58.648348', 'step': 3268, 'epoch': 1} {'type': 'loss', 'content': 0.18288326263427734, 'timestamp': '2025-09-10 02:29:58.650723', 'step': 3269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:58.684447', 'step': 3269, 'epoch': 1} {'type': 'loss', 'content': 0.17399407923221588, 'timestamp': '2025-09-10 02:29:58.687170', 'step': 3270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:58.719614', 'step': 3270, 'epoch': 1} {'type': 'loss', 'content': 0.14878077805042267, 'timestamp': '2025-09-10 02:29:58.722132', 'step': 3271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:58.753201', 'step': 3271, 'epoch': 1} {'type': 'loss', 'content': 0.08696795254945755, 'timestamp': '2025-09-10 02:29:58.776932', 'step': 3272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:58.810974', 'step': 3272, 'epoch': 1} {'type': 'loss', 'content': 0.17494989931583405, 'timestamp': '2025-09-10 02:29:58.813021', 'step': 3273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:58.842503', 'step': 3273, 'epoch': 1} {'type': 'loss', 'content': 0.18451857566833496, 'timestamp': '2025-09-10 02:29:58.845214', 'step': 3274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:58.877205', 'step': 3274, 'epoch': 1} {'type': 'loss', 'content': 0.08390499651432037, 'timestamp': '2025-09-10 02:29:58.879729', 'step': 3275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:58.910910', 'step': 3275, 'epoch': 1} {'type': 'loss', 'content': 0.1500479280948639, 'timestamp': '2025-09-10 02:29:58.934379', 'step': 3276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:58.964728', 'step': 3276, 'epoch': 1} {'type': 'loss', 'content': 0.17653928697109222, 'timestamp': '2025-09-10 02:29:58.967293', 'step': 3277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:58.997869', 'step': 3277, 'epoch': 1} {'type': 'loss', 'content': 0.09499409794807434, 'timestamp': '2025-09-10 02:29:59.000536', 'step': 3278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:59.031641', 'step': 3278, 'epoch': 1} {'type': 'loss', 'content': 0.22293536365032196, 'timestamp': '2025-09-10 02:29:59.035091', 'step': 3279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.066971', 'step': 3279, 'epoch': 1} {'type': 'loss', 'content': 0.15748287737369537, 'timestamp': '2025-09-10 02:29:59.092049', 'step': 3280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:59.124470', 'step': 3280, 'epoch': 1} {'type': 'loss', 'content': 0.12751464545726776, 'timestamp': '2025-09-10 02:29:59.127147', 'step': 3281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.157664', 'step': 3281, 'epoch': 1} {'type': 'loss', 'content': 0.06466534733772278, 'timestamp': '2025-09-10 02:29:59.159622', 'step': 3282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.190004', 'step': 3282, 'epoch': 1} {'type': 'loss', 'content': 0.21056468784809113, 'timestamp': '2025-09-10 02:29:59.193224', 'step': 3283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:59.225025', 'step': 3283, 'epoch': 1} {'type': 'loss', 'content': 0.16283413767814636, 'timestamp': '2025-09-10 02:29:59.248836', 'step': 3284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:59.280196', 'step': 3284, 'epoch': 1} {'type': 'loss', 'content': 0.2443685531616211, 'timestamp': '2025-09-10 02:29:59.282780', 'step': 3285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.315311', 'step': 3285, 'epoch': 1} {'type': 'loss', 'content': 0.16776934266090393, 'timestamp': '2025-09-10 02:29:59.317677', 'step': 3286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.349500', 'step': 3286, 'epoch': 1} {'type': 'loss', 'content': 0.14209768176078796, 'timestamp': '2025-09-10 02:29:59.352202', 'step': 3287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.383307', 'step': 3287, 'epoch': 1} {'type': 'loss', 'content': 0.20240989327430725, 'timestamp': '2025-09-10 02:29:59.407785', 'step': 3288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:59.438383', 'step': 3288, 'epoch': 1} {'type': 'loss', 'content': 0.1658492088317871, 'timestamp': '2025-09-10 02:29:59.441255', 'step': 3289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.471688', 'step': 3289, 'epoch': 1} {'type': 'loss', 'content': 0.2798406779766083, 'timestamp': '2025-09-10 02:29:59.475976', 'step': 3290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.505925', 'step': 3290, 'epoch': 1} {'type': 'loss', 'content': 0.17240749299526215, 'timestamp': '2025-09-10 02:29:59.509194', 'step': 3291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.540538', 'step': 3291, 'epoch': 1} {'type': 'loss', 'content': 0.05689078941941261, 'timestamp': '2025-09-10 02:29:59.564659', 'step': 3292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:59.595075', 'step': 3292, 'epoch': 1} {'type': 'loss', 'content': 0.26871809363365173, 'timestamp': '2025-09-10 02:29:59.597505', 'step': 3293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.629370', 'step': 3293, 'epoch': 1} {'type': 'loss', 'content': 0.19889549911022186, 'timestamp': '2025-09-10 02:29:59.633419', 'step': 3294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:59.663657', 'step': 3294, 'epoch': 1} {'type': 'loss', 'content': 0.13614815473556519, 'timestamp': '2025-09-10 02:29:59.666481', 'step': 3295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:29:59.698972', 'step': 3295, 'epoch': 1} {'type': 'loss', 'content': 0.12718389928340912, 'timestamp': '2025-09-10 02:29:59.722354', 'step': 3296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:59.753388', 'step': 3296, 'epoch': 1} {'type': 'loss', 'content': 0.1356283724308014, 'timestamp': '2025-09-10 02:29:59.760627', 'step': 3297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:59.810023', 'step': 3297, 'epoch': 1} {'type': 'loss', 'content': 0.15029235184192657, 'timestamp': '2025-09-10 02:29:59.815383', 'step': 3298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.855290', 'step': 3298, 'epoch': 1} {'type': 'loss', 'content': 0.10783114284276962, 'timestamp': '2025-09-10 02:29:59.858145', 'step': 3299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.889509', 'step': 3299, 'epoch': 1} {'type': 'loss', 'content': 0.15503165125846863, 'timestamp': '2025-09-10 02:29:59.918025', 'step': 3300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.949457', 'step': 3300, 'epoch': 1} {'type': 'loss', 'content': 0.1224883571267128, 'timestamp': '2025-09-10 02:29:59.954791', 'step': 3301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.992378', 'step': 3301, 'epoch': 1} {'type': 'loss', 'content': 0.2625923156738281, 'timestamp': '2025-09-10 02:29:59.997257', 'step': 3302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.034555', 'step': 3302, 'epoch': 1} {'type': 'loss', 'content': 0.20826418697834015, 'timestamp': '2025-09-10 02:30:00.040641', 'step': 3303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.079865', 'step': 3303, 'epoch': 1} {'type': 'loss', 'content': 0.12555797398090363, 'timestamp': '2025-09-10 02:30:00.105565', 'step': 3304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:00.143181', 'step': 3304, 'epoch': 1} {'type': 'loss', 'content': 0.26516029238700867, 'timestamp': '2025-09-10 02:30:00.146499', 'step': 3305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.178392', 'step': 3305, 'epoch': 1} {'type': 'loss', 'content': 0.11991115659475327, 'timestamp': '2025-09-10 02:30:00.183795', 'step': 3306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.216567', 'step': 3306, 'epoch': 1} {'type': 'loss', 'content': 0.1634092926979065, 'timestamp': '2025-09-10 02:30:00.222264', 'step': 3307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.252198', 'step': 3307, 'epoch': 1} {'type': 'loss', 'content': 0.1360584944486618, 'timestamp': '2025-09-10 02:30:00.276405', 'step': 3308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.322673', 'step': 3308, 'epoch': 1} {'type': 'loss', 'content': 0.1603572815656662, 'timestamp': '2025-09-10 02:30:00.325426', 'step': 3309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.362322', 'step': 3309, 'epoch': 1} {'type': 'loss', 'content': 0.14833930134773254, 'timestamp': '2025-09-10 02:30:00.363951', 'step': 3310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.393026', 'step': 3310, 'epoch': 1} {'type': 'loss', 'content': 0.18940432369709015, 'timestamp': '2025-09-10 02:30:00.395316', 'step': 3311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.425871', 'step': 3311, 'epoch': 1} {'type': 'loss', 'content': 0.16702960431575775, 'timestamp': '2025-09-10 02:30:00.449082', 'step': 3312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.484919', 'step': 3312, 'epoch': 1} {'type': 'loss', 'content': 0.14445067942142487, 'timestamp': '2025-09-10 02:30:00.488294', 'step': 3313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:00.519187', 'step': 3313, 'epoch': 1} {'type': 'loss', 'content': 0.16272243857383728, 'timestamp': '2025-09-10 02:30:00.521838', 'step': 3314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:00.551666', 'step': 3314, 'epoch': 1} {'type': 'loss', 'content': 0.14354948699474335, 'timestamp': '2025-09-10 02:30:00.553988', 'step': 3315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.586689', 'step': 3315, 'epoch': 1} {'type': 'loss', 'content': 0.12819312512874603, 'timestamp': '2025-09-10 02:30:00.611081', 'step': 3316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:00.642334', 'step': 3316, 'epoch': 1} {'type': 'loss', 'content': 0.12850432097911835, 'timestamp': '2025-09-10 02:30:00.644422', 'step': 3317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:00.674349', 'step': 3317, 'epoch': 1} {'type': 'loss', 'content': 0.1512690633535385, 'timestamp': '2025-09-10 02:30:00.676108', 'step': 3318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.706345', 'step': 3318, 'epoch': 1} {'type': 'loss', 'content': 0.18131624162197113, 'timestamp': '2025-09-10 02:30:00.708521', 'step': 3319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.738239', 'step': 3319, 'epoch': 1} {'type': 'loss', 'content': 0.14139868319034576, 'timestamp': '2025-09-10 02:30:00.761295', 'step': 3320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:00.791691', 'step': 3320, 'epoch': 1} {'type': 'loss', 'content': 0.20465341210365295, 'timestamp': '2025-09-10 02:30:00.793666', 'step': 3321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.824632', 'step': 3321, 'epoch': 1} {'type': 'loss', 'content': 0.10433832556009293, 'timestamp': '2025-09-10 02:30:00.827167', 'step': 3322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:00.858500', 'step': 3322, 'epoch': 1} {'type': 'loss', 'content': 0.0910247415304184, 'timestamp': '2025-09-10 02:30:00.862613', 'step': 3323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:00.893420', 'step': 3323, 'epoch': 1} {'type': 'loss', 'content': 0.13591507077217102, 'timestamp': '2025-09-10 02:30:00.916644', 'step': 3324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:00.946772', 'step': 3324, 'epoch': 1} {'type': 'loss', 'content': 0.2805560827255249, 'timestamp': '2025-09-10 02:30:00.949282', 'step': 3325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.982204', 'step': 3325, 'epoch': 1} {'type': 'loss', 'content': 0.13270433247089386, 'timestamp': '2025-09-10 02:30:00.984273', 'step': 3326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:01.018462', 'step': 3326, 'epoch': 1} {'type': 'loss', 'content': 0.2079595923423767, 'timestamp': '2025-09-10 02:30:01.021008', 'step': 3327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:01.054562', 'step': 3327, 'epoch': 1} {'type': 'loss', 'content': 0.21074369549751282, 'timestamp': '2025-09-10 02:30:01.079946', 'step': 3328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:01.110969', 'step': 3328, 'epoch': 1} {'type': 'loss', 'content': 0.21982915699481964, 'timestamp': '2025-09-10 02:30:01.112912', 'step': 3329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:01.143976', 'step': 3329, 'epoch': 1} {'type': 'loss', 'content': 0.11506625264883041, 'timestamp': '2025-09-10 02:30:01.146083', 'step': 3330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:01.177902', 'step': 3330, 'epoch': 1} {'type': 'loss', 'content': 0.08453745394945145, 'timestamp': '2025-09-10 02:30:01.180017', 'step': 3331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:01.210850', 'step': 3331, 'epoch': 1} {'type': 'loss', 'content': 0.20173174142837524, 'timestamp': '2025-09-10 02:30:01.234278', 'step': 3332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:01.265430', 'step': 3332, 'epoch': 1} {'type': 'loss', 'content': 0.10672577470541, 'timestamp': '2025-09-10 02:30:01.267203', 'step': 3333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:01.297602', 'step': 3333, 'epoch': 1} {'type': 'loss', 'content': 0.2315254509449005, 'timestamp': '2025-09-10 02:30:01.299411', 'step': 3334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:01.330982', 'step': 3334, 'epoch': 1} {'type': 'loss', 'content': 0.14710263907909393, 'timestamp': '2025-09-10 02:30:01.332981', 'step': 3335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:01.363254', 'step': 3335, 'epoch': 1} {'type': 'loss', 'content': 0.17461428046226501, 'timestamp': '2025-09-10 02:30:01.386711', 'step': 3336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:01.420547', 'step': 3336, 'epoch': 1} {'type': 'loss', 'content': 0.11583846807479858, 'timestamp': '2025-09-10 02:30:01.423069', 'step': 3337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:01.454488', 'step': 3337, 'epoch': 1} {'type': 'loss', 'content': 0.129347562789917, 'timestamp': '2025-09-10 02:30:01.457184', 'step': 3338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:01.487983', 'step': 3338, 'epoch': 1} {'type': 'loss', 'content': 0.28298503160476685, 'timestamp': '2025-09-10 02:30:01.490658', 'step': 3339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:30:01.529775', 'step': 3339, 'epoch': 1} {'type': 'loss', 'content': 0.17947986721992493, 'timestamp': '2025-09-10 02:30:01.566652', 'step': 3340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:01.607207', 'step': 3340, 'epoch': 1} {'type': 'loss', 'content': 0.2116166353225708, 'timestamp': '2025-09-10 02:30:01.609249', 'step': 3341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:01.640481', 'step': 3341, 'epoch': 1} {'type': 'loss', 'content': 0.2616614103317261, 'timestamp': '2025-09-10 02:30:01.642512', 'step': 3342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:01.674134', 'step': 3342, 'epoch': 1} {'type': 'loss', 'content': 0.1672903299331665, 'timestamp': '2025-09-10 02:30:01.675979', 'step': 3343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:01.706048', 'step': 3343, 'epoch': 1} {'type': 'loss', 'content': 0.2299618124961853, 'timestamp': '2025-09-10 02:30:01.729496', 'step': 3344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:01.760107', 'step': 3344, 'epoch': 1} {'type': 'loss', 'content': 0.15564219653606415, 'timestamp': '2025-09-10 02:30:01.762353', 'step': 3345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:01.794320', 'step': 3345, 'epoch': 1} {'type': 'loss', 'content': 0.17490486800670624, 'timestamp': '2025-09-10 02:30:01.801746', 'step': 3346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:01.837203', 'step': 3346, 'epoch': 1} {'type': 'loss', 'content': 0.15696828067302704, 'timestamp': '2025-09-10 02:30:01.842158', 'step': 3347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:01.876007', 'step': 3347, 'epoch': 1} {'type': 'loss', 'content': 0.15071776509284973, 'timestamp': '2025-09-10 02:30:01.899561', 'step': 3348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:01.932211', 'step': 3348, 'epoch': 1} {'type': 'loss', 'content': 0.3145962357521057, 'timestamp': '2025-09-10 02:30:01.936559', 'step': 3349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:01.977569', 'step': 3349, 'epoch': 1} {'type': 'loss', 'content': 0.19461047649383545, 'timestamp': '2025-09-10 02:30:01.979863', 'step': 3350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:02.010340', 'step': 3350, 'epoch': 1} {'type': 'loss', 'content': 0.12023547291755676, 'timestamp': '2025-09-10 02:30:02.012360', 'step': 3351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:02.043890', 'step': 3351, 'epoch': 1} {'type': 'loss', 'content': 0.15765561163425446, 'timestamp': '2025-09-10 02:30:02.070218', 'step': 3352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:02.107248', 'step': 3352, 'epoch': 1} {'type': 'loss', 'content': 0.15661606192588806, 'timestamp': '2025-09-10 02:30:02.110456', 'step': 3353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:02.145421', 'step': 3353, 'epoch': 1} {'type': 'loss', 'content': 0.12835371494293213, 'timestamp': '2025-09-10 02:30:02.147803', 'step': 3354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:02.187883', 'step': 3354, 'epoch': 1} {'type': 'loss', 'content': 0.22902214527130127, 'timestamp': '2025-09-10 02:30:02.190094', 'step': 3355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:02.225818', 'step': 3355, 'epoch': 1} {'type': 'loss', 'content': 0.20868369936943054, 'timestamp': '2025-09-10 02:30:02.248960', 'step': 3356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:02.283755', 'step': 3356, 'epoch': 1} {'type': 'loss', 'content': 0.13389737904071808, 'timestamp': '2025-09-10 02:30:02.286372', 'step': 3357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:02.320166', 'step': 3357, 'epoch': 1} {'type': 'loss', 'content': 0.13293838500976562, 'timestamp': '2025-09-10 02:30:02.322343', 'step': 3358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:02.353214', 'step': 3358, 'epoch': 1} {'type': 'loss', 'content': 0.10449735820293427, 'timestamp': '2025-09-10 02:30:02.355410', 'step': 3359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:02.390223', 'step': 3359, 'epoch': 1} {'type': 'loss', 'content': 0.1957971751689911, 'timestamp': '2025-09-10 02:30:02.413847', 'step': 3360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:02.450974', 'step': 3360, 'epoch': 1} {'type': 'loss', 'content': 0.16349920630455017, 'timestamp': '2025-09-10 02:30:02.456164', 'step': 3361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:02.501096', 'step': 3361, 'epoch': 1} {'type': 'loss', 'content': 0.163295179605484, 'timestamp': '2025-09-10 02:30:02.510264', 'step': 3362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:02.547914', 'step': 3362, 'epoch': 1} {'type': 'loss', 'content': 0.08733636140823364, 'timestamp': '2025-09-10 02:30:02.549974', 'step': 3363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:02.580943', 'step': 3363, 'epoch': 1} {'type': 'loss', 'content': 0.16743895411491394, 'timestamp': '2025-09-10 02:30:02.605931', 'step': 3364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:02.640630', 'step': 3364, 'epoch': 1} {'type': 'loss', 'content': 0.28924766182899475, 'timestamp': '2025-09-10 02:30:02.643173', 'step': 3365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:02.675216', 'step': 3365, 'epoch': 1} {'type': 'loss', 'content': 0.11121852695941925, 'timestamp': '2025-09-10 02:30:02.677476', 'step': 3366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:02.708904', 'step': 3366, 'epoch': 1} {'type': 'loss', 'content': 0.1894960105419159, 'timestamp': '2025-09-10 02:30:02.711460', 'step': 3367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:02.745289', 'step': 3367, 'epoch': 1} {'type': 'loss', 'content': 0.16275765001773834, 'timestamp': '2025-09-10 02:30:02.769314', 'step': 3368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:02.812770', 'step': 3368, 'epoch': 1} {'type': 'loss', 'content': 0.23905231058597565, 'timestamp': '2025-09-10 02:30:02.815211', 'step': 3369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:02.850651', 'step': 3369, 'epoch': 1} {'type': 'loss', 'content': 0.131266251206398, 'timestamp': '2025-09-10 02:30:02.852865', 'step': 3370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:02.883870', 'step': 3370, 'epoch': 1} {'type': 'loss', 'content': 0.211116224527359, 'timestamp': '2025-09-10 02:30:02.886898', 'step': 3371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:02.917755', 'step': 3371, 'epoch': 1} {'type': 'loss', 'content': 0.14815905690193176, 'timestamp': '2025-09-10 02:30:02.942296', 'step': 3372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:02.979197', 'step': 3372, 'epoch': 1} {'type': 'loss', 'content': 0.13700906932353973, 'timestamp': '2025-09-10 02:30:02.981147', 'step': 3373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:03.011596', 'step': 3373, 'epoch': 1} {'type': 'loss', 'content': 0.14935997128486633, 'timestamp': '2025-09-10 02:30:03.017271', 'step': 3374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.050574', 'step': 3374, 'epoch': 1} {'type': 'loss', 'content': 0.20517569780349731, 'timestamp': '2025-09-10 02:30:03.052981', 'step': 3375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.093242', 'step': 3375, 'epoch': 1} {'type': 'loss', 'content': 0.2501614987850189, 'timestamp': '2025-09-10 02:30:03.120565', 'step': 3376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:03.153916', 'step': 3376, 'epoch': 1} {'type': 'loss', 'content': 0.23473802208900452, 'timestamp': '2025-09-10 02:30:03.156663', 'step': 3377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:03.187665', 'step': 3377, 'epoch': 1} {'type': 'loss', 'content': 0.16503974795341492, 'timestamp': '2025-09-10 02:30:03.191259', 'step': 3378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.221691', 'step': 3378, 'epoch': 1} {'type': 'loss', 'content': 0.13430549204349518, 'timestamp': '2025-09-10 02:30:03.224520', 'step': 3379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:03.254775', 'step': 3379, 'epoch': 1} {'type': 'loss', 'content': 0.17150577902793884, 'timestamp': '2025-09-10 02:30:03.278356', 'step': 3380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:03.321307', 'step': 3380, 'epoch': 1} {'type': 'loss', 'content': 0.19993440806865692, 'timestamp': '2025-09-10 02:30:03.327846', 'step': 3381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:03.358260', 'step': 3381, 'epoch': 1} {'type': 'loss', 'content': 0.2146424949169159, 'timestamp': '2025-09-10 02:30:03.360702', 'step': 3382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.392735', 'step': 3382, 'epoch': 1} {'type': 'loss', 'content': 0.1738186925649643, 'timestamp': '2025-09-10 02:30:03.398669', 'step': 3383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.430016', 'step': 3383, 'epoch': 1} {'type': 'loss', 'content': 0.13704444468021393, 'timestamp': '2025-09-10 02:30:03.452832', 'step': 3384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.483891', 'step': 3384, 'epoch': 1} {'type': 'loss', 'content': 0.1940314769744873, 'timestamp': '2025-09-10 02:30:03.487450', 'step': 3385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:03.518600', 'step': 3385, 'epoch': 1} {'type': 'loss', 'content': 0.17528028786182404, 'timestamp': '2025-09-10 02:30:03.521481', 'step': 3386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:03.556615', 'step': 3386, 'epoch': 1} {'type': 'loss', 'content': 0.12047625333070755, 'timestamp': '2025-09-10 02:30:03.559861', 'step': 3387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.594046', 'step': 3387, 'epoch': 1} {'type': 'loss', 'content': 0.09826316684484482, 'timestamp': '2025-09-10 02:30:03.619772', 'step': 3388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.652500', 'step': 3388, 'epoch': 1} {'type': 'loss', 'content': 0.16627272963523865, 'timestamp': '2025-09-10 02:30:03.654786', 'step': 3389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.686525', 'step': 3389, 'epoch': 1} {'type': 'loss', 'content': 0.22402150928974152, 'timestamp': '2025-09-10 02:30:03.689171', 'step': 3390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.722393', 'step': 3390, 'epoch': 1} {'type': 'loss', 'content': 0.17445342242717743, 'timestamp': '2025-09-10 02:30:03.724428', 'step': 3391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:03.758049', 'step': 3391, 'epoch': 1} {'type': 'loss', 'content': 0.16768917441368103, 'timestamp': '2025-09-10 02:30:03.781066', 'step': 3392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:03.824164', 'step': 3392, 'epoch': 1} {'type': 'loss', 'content': 0.1185452863574028, 'timestamp': '2025-09-10 02:30:03.826702', 'step': 3393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.859188', 'step': 3393, 'epoch': 1} {'type': 'loss', 'content': 0.12490630894899368, 'timestamp': '2025-09-10 02:30:03.861487', 'step': 3394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:03.891972', 'step': 3394, 'epoch': 1} {'type': 'loss', 'content': 0.1024746373295784, 'timestamp': '2025-09-10 02:30:03.895450', 'step': 3395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:03.927515', 'step': 3395, 'epoch': 1} {'type': 'loss', 'content': 0.20874406397342682, 'timestamp': '2025-09-10 02:30:03.950888', 'step': 3396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:03.980932', 'step': 3396, 'epoch': 1} {'type': 'loss', 'content': 0.1768663078546524, 'timestamp': '2025-09-10 02:30:03.983090', 'step': 3397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.012968', 'step': 3397, 'epoch': 1} {'type': 'loss', 'content': 0.18427130579948425, 'timestamp': '2025-09-10 02:30:04.015000', 'step': 3398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.044825', 'step': 3398, 'epoch': 1} {'type': 'loss', 'content': 0.20814140141010284, 'timestamp': '2025-09-10 02:30:04.046710', 'step': 3399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.076022', 'step': 3399, 'epoch': 1} {'type': 'loss', 'content': 0.13103759288787842, 'timestamp': '2025-09-10 02:30:04.099614', 'step': 3400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.130369', 'step': 3400, 'epoch': 1} {'type': 'loss', 'content': 0.1657283753156662, 'timestamp': '2025-09-10 02:30:04.132305', 'step': 3401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:04.163892', 'step': 3401, 'epoch': 1} {'type': 'loss', 'content': 0.12709729373455048, 'timestamp': '2025-09-10 02:30:04.166266', 'step': 3402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:04.197216', 'step': 3402, 'epoch': 1} {'type': 'loss', 'content': 0.2818969190120697, 'timestamp': '2025-09-10 02:30:04.199393', 'step': 3403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:04.230026', 'step': 3403, 'epoch': 1} {'type': 'loss', 'content': 0.186140775680542, 'timestamp': '2025-09-10 02:30:04.253201', 'step': 3404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.283559', 'step': 3404, 'epoch': 1} {'type': 'loss', 'content': 0.16672837734222412, 'timestamp': '2025-09-10 02:30:04.285938', 'step': 3405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:04.315628', 'step': 3405, 'epoch': 1} {'type': 'loss', 'content': 0.14097018539905548, 'timestamp': '2025-09-10 02:30:04.317917', 'step': 3406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:04.348540', 'step': 3406, 'epoch': 1} {'type': 'loss', 'content': 0.1947997510433197, 'timestamp': '2025-09-10 02:30:04.351271', 'step': 3407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:04.382549', 'step': 3407, 'epoch': 1} {'type': 'loss', 'content': 0.19583219289779663, 'timestamp': '2025-09-10 02:30:04.409155', 'step': 3408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:04.441939', 'step': 3408, 'epoch': 1} {'type': 'loss', 'content': 0.11801169812679291, 'timestamp': '2025-09-10 02:30:04.444412', 'step': 3409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:04.476289', 'step': 3409, 'epoch': 1} {'type': 'loss', 'content': 0.139032244682312, 'timestamp': '2025-09-10 02:30:04.483550', 'step': 3410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:04.516982', 'step': 3410, 'epoch': 1} {'type': 'loss', 'content': 0.05972002446651459, 'timestamp': '2025-09-10 02:30:04.518956', 'step': 3411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:04.549592', 'step': 3411, 'epoch': 1} {'type': 'loss', 'content': 0.24793091416358948, 'timestamp': '2025-09-10 02:30:04.573505', 'step': 3412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.605897', 'step': 3412, 'epoch': 1} {'type': 'loss', 'content': 0.18140864372253418, 'timestamp': '2025-09-10 02:30:04.608547', 'step': 3413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:04.644652', 'step': 3413, 'epoch': 1} {'type': 'loss', 'content': 0.1448400914669037, 'timestamp': '2025-09-10 02:30:04.647767', 'step': 3414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.680259', 'step': 3414, 'epoch': 1} {'type': 'loss', 'content': 0.12613758444786072, 'timestamp': '2025-09-10 02:30:04.683187', 'step': 3415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:04.716876', 'step': 3415, 'epoch': 1} {'type': 'loss', 'content': 0.1822115182876587, 'timestamp': '2025-09-10 02:30:04.740521', 'step': 3416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:04.780633', 'step': 3416, 'epoch': 1} {'type': 'loss', 'content': 0.11772366613149643, 'timestamp': '2025-09-10 02:30:04.783215', 'step': 3417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.818406', 'step': 3417, 'epoch': 1} {'type': 'loss', 'content': 0.13227197527885437, 'timestamp': '2025-09-10 02:30:04.820278', 'step': 3418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.852255', 'step': 3418, 'epoch': 1} {'type': 'loss', 'content': 0.13294607400894165, 'timestamp': '2025-09-10 02:30:04.854348', 'step': 3419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:04.885538', 'step': 3419, 'epoch': 1} {'type': 'loss', 'content': 0.09961279481649399, 'timestamp': '2025-09-10 02:30:04.909775', 'step': 3420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:04.942699', 'step': 3420, 'epoch': 1} {'type': 'loss', 'content': 0.21197931468486786, 'timestamp': '2025-09-10 02:30:04.945264', 'step': 3421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:04.977893', 'step': 3421, 'epoch': 1} {'type': 'loss', 'content': 0.13500866293907166, 'timestamp': '2025-09-10 02:30:04.980252', 'step': 3422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:05.011993', 'step': 3422, 'epoch': 1} {'type': 'loss', 'content': 0.1633947640657425, 'timestamp': '2025-09-10 02:30:05.014435', 'step': 3423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.050361', 'step': 3423, 'epoch': 1} {'type': 'loss', 'content': 0.10224058479070663, 'timestamp': '2025-09-10 02:30:05.073507', 'step': 3424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.106698', 'step': 3424, 'epoch': 1} {'type': 'loss', 'content': 0.10735945403575897, 'timestamp': '2025-09-10 02:30:05.108886', 'step': 3425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.139535', 'step': 3425, 'epoch': 1} {'type': 'loss', 'content': 0.1458677500486374, 'timestamp': '2025-09-10 02:30:05.141786', 'step': 3426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:05.172798', 'step': 3426, 'epoch': 1} {'type': 'loss', 'content': 0.2587486505508423, 'timestamp': '2025-09-10 02:30:05.175640', 'step': 3427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.205949', 'step': 3427, 'epoch': 1} {'type': 'loss', 'content': 0.2758069634437561, 'timestamp': '2025-09-10 02:30:05.229486', 'step': 3428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:05.259991', 'step': 3428, 'epoch': 1} {'type': 'loss', 'content': 0.17644257843494415, 'timestamp': '2025-09-10 02:30:05.263098', 'step': 3429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:05.297696', 'step': 3429, 'epoch': 1} {'type': 'loss', 'content': 0.24103814363479614, 'timestamp': '2025-09-10 02:30:05.300442', 'step': 3430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.332964', 'step': 3430, 'epoch': 1} {'type': 'loss', 'content': 0.14610330760478973, 'timestamp': '2025-09-10 02:30:05.334773', 'step': 3431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.364801', 'step': 3431, 'epoch': 1} {'type': 'loss', 'content': 0.14957623183727264, 'timestamp': '2025-09-10 02:30:05.388514', 'step': 3432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.418648', 'step': 3432, 'epoch': 1} {'type': 'loss', 'content': 0.08493838459253311, 'timestamp': '2025-09-10 02:30:05.421251', 'step': 3433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.453784', 'step': 3433, 'epoch': 1} {'type': 'loss', 'content': 0.18840709328651428, 'timestamp': '2025-09-10 02:30:05.455936', 'step': 3434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.485968', 'step': 3434, 'epoch': 1} {'type': 'loss', 'content': 0.17611217498779297, 'timestamp': '2025-09-10 02:30:05.487769', 'step': 3435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.517613', 'step': 3435, 'epoch': 1} {'type': 'loss', 'content': 0.2739306092262268, 'timestamp': '2025-09-10 02:30:05.540893', 'step': 3436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:05.571102', 'step': 3436, 'epoch': 1} {'type': 'loss', 'content': 0.11684049665927887, 'timestamp': '2025-09-10 02:30:05.573098', 'step': 3437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.603992', 'step': 3437, 'epoch': 1} {'type': 'loss', 'content': 0.25415095686912537, 'timestamp': '2025-09-10 02:30:05.606401', 'step': 3438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:05.637459', 'step': 3438, 'epoch': 1} {'type': 'loss', 'content': 0.1147756576538086, 'timestamp': '2025-09-10 02:30:05.640001', 'step': 3439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.669461', 'step': 3439, 'epoch': 1} {'type': 'loss', 'content': 0.1649940460920334, 'timestamp': '2025-09-10 02:30:05.695186', 'step': 3440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:05.724987', 'step': 3440, 'epoch': 1} {'type': 'loss', 'content': 0.13224433362483978, 'timestamp': '2025-09-10 02:30:05.727129', 'step': 3441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.757556', 'step': 3441, 'epoch': 1} {'type': 'loss', 'content': 0.1765958070755005, 'timestamp': '2025-09-10 02:30:05.759874', 'step': 3442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.789965', 'step': 3442, 'epoch': 1} {'type': 'loss', 'content': 0.16678309440612793, 'timestamp': '2025-09-10 02:30:05.796745', 'step': 3443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:05.827634', 'step': 3443, 'epoch': 1} {'type': 'loss', 'content': 0.10885177552700043, 'timestamp': '2025-09-10 02:30:05.851690', 'step': 3444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:05.883561', 'step': 3444, 'epoch': 1} {'type': 'loss', 'content': 0.18805545568466187, 'timestamp': '2025-09-10 02:30:05.885928', 'step': 3445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:05.916303', 'step': 3445, 'epoch': 1} {'type': 'loss', 'content': 0.15332865715026855, 'timestamp': '2025-09-10 02:30:05.918616', 'step': 3446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.949151', 'step': 3446, 'epoch': 1} {'type': 'loss', 'content': 0.17152924835681915, 'timestamp': '2025-09-10 02:30:05.951141', 'step': 3447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.982245', 'step': 3447, 'epoch': 1} {'type': 'loss', 'content': 0.198723703622818, 'timestamp': '2025-09-10 02:30:06.005357', 'step': 3448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:06.036041', 'step': 3448, 'epoch': 1} {'type': 'loss', 'content': 0.13509142398834229, 'timestamp': '2025-09-10 02:30:06.037691', 'step': 3449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:06.068264', 'step': 3449, 'epoch': 1} {'type': 'loss', 'content': 0.18474537134170532, 'timestamp': '2025-09-10 02:30:06.071263', 'step': 3450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.102256', 'step': 3450, 'epoch': 1} {'type': 'loss', 'content': 0.20522478222846985, 'timestamp': '2025-09-10 02:30:06.104288', 'step': 3451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:06.134965', 'step': 3451, 'epoch': 1} {'type': 'loss', 'content': 0.19469763338565826, 'timestamp': '2025-09-10 02:30:06.158240', 'step': 3452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:06.189969', 'step': 3452, 'epoch': 1} {'type': 'loss', 'content': 0.21822591125965118, 'timestamp': '2025-09-10 02:30:06.192314', 'step': 3453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:06.222424', 'step': 3453, 'epoch': 1} {'type': 'loss', 'content': 0.19163775444030762, 'timestamp': '2025-09-10 02:30:06.224842', 'step': 3454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.255465', 'step': 3454, 'epoch': 1} {'type': 'loss', 'content': 0.1344255805015564, 'timestamp': '2025-09-10 02:30:06.257106', 'step': 3455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.288942', 'step': 3455, 'epoch': 1} {'type': 'loss', 'content': 0.19528473913669586, 'timestamp': '2025-09-10 02:30:06.312532', 'step': 3456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.342623', 'step': 3456, 'epoch': 1} {'type': 'loss', 'content': 0.07049690186977386, 'timestamp': '2025-09-10 02:30:06.344769', 'step': 3457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.374444', 'step': 3457, 'epoch': 1} {'type': 'loss', 'content': 0.12492454051971436, 'timestamp': '2025-09-10 02:30:06.376455', 'step': 3458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.405764', 'step': 3458, 'epoch': 1} {'type': 'loss', 'content': 0.20215736329555511, 'timestamp': '2025-09-10 02:30:06.407577', 'step': 3459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:06.438618', 'step': 3459, 'epoch': 1} {'type': 'loss', 'content': 0.13610981404781342, 'timestamp': '2025-09-10 02:30:06.462191', 'step': 3460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.493463', 'step': 3460, 'epoch': 1} {'type': 'loss', 'content': 0.16485711932182312, 'timestamp': '2025-09-10 02:30:06.495768', 'step': 3461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.526299', 'step': 3461, 'epoch': 1} {'type': 'loss', 'content': 0.10153144598007202, 'timestamp': '2025-09-10 02:30:06.529682', 'step': 3462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.560904', 'step': 3462, 'epoch': 1} {'type': 'loss', 'content': 0.14116759598255157, 'timestamp': '2025-09-10 02:30:06.563109', 'step': 3463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.594818', 'step': 3463, 'epoch': 1} {'type': 'loss', 'content': 0.13910526037216187, 'timestamp': '2025-09-10 02:30:06.618516', 'step': 3464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.649842', 'step': 3464, 'epoch': 1} {'type': 'loss', 'content': 0.17313572764396667, 'timestamp': '2025-09-10 02:30:06.652460', 'step': 3465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.681760', 'step': 3465, 'epoch': 1} {'type': 'loss', 'content': 0.12393353134393692, 'timestamp': '2025-09-10 02:30:06.683916', 'step': 3466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.713638', 'step': 3466, 'epoch': 1} {'type': 'loss', 'content': 0.19119948148727417, 'timestamp': '2025-09-10 02:30:06.715905', 'step': 3467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.746326', 'step': 3467, 'epoch': 1} {'type': 'loss', 'content': 0.15837909281253815, 'timestamp': '2025-09-10 02:30:06.769287', 'step': 3468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.799498', 'step': 3468, 'epoch': 1} {'type': 'loss', 'content': 0.16710087656974792, 'timestamp': '2025-09-10 02:30:06.809459', 'step': 3469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.859907', 'step': 3469, 'epoch': 1} {'type': 'loss', 'content': 0.1650351732969284, 'timestamp': '2025-09-10 02:30:06.862063', 'step': 3470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:06.894042', 'step': 3470, 'epoch': 1} {'type': 'loss', 'content': 0.22910872101783752, 'timestamp': '2025-09-10 02:30:06.896516', 'step': 3471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.928308', 'step': 3471, 'epoch': 1} {'type': 'loss', 'content': 0.14630930125713348, 'timestamp': '2025-09-10 02:30:06.951552', 'step': 3472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:06.983251', 'step': 3472, 'epoch': 1} {'type': 'loss', 'content': 0.14454670250415802, 'timestamp': '2025-09-10 02:30:06.985433', 'step': 3473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:07.015718', 'step': 3473, 'epoch': 1} {'type': 'loss', 'content': 0.18614500761032104, 'timestamp': '2025-09-10 02:30:07.017624', 'step': 3474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:07.047751', 'step': 3474, 'epoch': 1} {'type': 'loss', 'content': 0.1622529774904251, 'timestamp': '2025-09-10 02:30:07.060959', 'step': 3475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.096252', 'step': 3475, 'epoch': 1} {'type': 'loss', 'content': 0.2661825120449066, 'timestamp': '2025-09-10 02:30:07.119226', 'step': 3476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.148462', 'step': 3476, 'epoch': 1} {'type': 'loss', 'content': 0.15034368634223938, 'timestamp': '2025-09-10 02:30:07.150171', 'step': 3477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.179519', 'step': 3477, 'epoch': 1} {'type': 'loss', 'content': 0.15022584795951843, 'timestamp': '2025-09-10 02:30:07.182641', 'step': 3478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.213241', 'step': 3478, 'epoch': 1} {'type': 'loss', 'content': 0.16305844485759735, 'timestamp': '2025-09-10 02:30:07.215568', 'step': 3479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.245568', 'step': 3479, 'epoch': 1} {'type': 'loss', 'content': 0.1980602890253067, 'timestamp': '2025-09-10 02:30:07.268592', 'step': 3480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:07.299842', 'step': 3480, 'epoch': 1} {'type': 'loss', 'content': 0.15324810147285461, 'timestamp': '2025-09-10 02:30:07.301555', 'step': 3481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:07.331292', 'step': 3481, 'epoch': 1} {'type': 'loss', 'content': 0.23920094966888428, 'timestamp': '2025-09-10 02:30:07.333421', 'step': 3482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:07.362796', 'step': 3482, 'epoch': 1} {'type': 'loss', 'content': 0.19320861995220184, 'timestamp': '2025-09-10 02:30:07.365402', 'step': 3483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.396094', 'step': 3483, 'epoch': 1} {'type': 'loss', 'content': 0.1579984873533249, 'timestamp': '2025-09-10 02:30:07.419343', 'step': 3484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.449664', 'step': 3484, 'epoch': 1} {'type': 'loss', 'content': 0.134184792637825, 'timestamp': '2025-09-10 02:30:07.451376', 'step': 3485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.481873', 'step': 3485, 'epoch': 1} {'type': 'loss', 'content': 0.11725206673145294, 'timestamp': '2025-09-10 02:30:07.483660', 'step': 3486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.514301', 'step': 3486, 'epoch': 1} {'type': 'loss', 'content': 0.18171623349189758, 'timestamp': '2025-09-10 02:30:07.516233', 'step': 3487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:07.546823', 'step': 3487, 'epoch': 1} {'type': 'loss', 'content': 0.14437516033649445, 'timestamp': '2025-09-10 02:30:07.569914', 'step': 3488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.603272', 'step': 3488, 'epoch': 1} {'type': 'loss', 'content': 0.1887580305337906, 'timestamp': '2025-09-10 02:30:07.605644', 'step': 3489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:07.636429', 'step': 3489, 'epoch': 1} {'type': 'loss', 'content': 0.13880564272403717, 'timestamp': '2025-09-10 02:30:07.638744', 'step': 3490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.667928', 'step': 3490, 'epoch': 1} {'type': 'loss', 'content': 0.19290897250175476, 'timestamp': '2025-09-10 02:30:07.669865', 'step': 3491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.699458', 'step': 3491, 'epoch': 1} {'type': 'loss', 'content': 0.15255415439605713, 'timestamp': '2025-09-10 02:30:07.723017', 'step': 3492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.753073', 'step': 3492, 'epoch': 1} {'type': 'loss', 'content': 0.09469921141862869, 'timestamp': '2025-09-10 02:30:07.754927', 'step': 3493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.785077', 'step': 3493, 'epoch': 1} {'type': 'loss', 'content': 0.19132108986377716, 'timestamp': '2025-09-10 02:30:07.786880', 'step': 3494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:07.820474', 'step': 3494, 'epoch': 1} {'type': 'loss', 'content': 0.14074759185314178, 'timestamp': '2025-09-10 02:30:07.823127', 'step': 3495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:07.854256', 'step': 3495, 'epoch': 1} {'type': 'loss', 'content': 0.34547239542007446, 'timestamp': '2025-09-10 02:30:07.877837', 'step': 3496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:07.910366', 'step': 3496, 'epoch': 1} {'type': 'loss', 'content': 0.10569901019334793, 'timestamp': '2025-09-10 02:30:07.912641', 'step': 3497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.943315', 'step': 3497, 'epoch': 1} {'type': 'loss', 'content': 0.1896829456090927, 'timestamp': '2025-09-10 02:30:07.945025', 'step': 3498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.974685', 'step': 3498, 'epoch': 1} {'type': 'loss', 'content': 0.1287507563829422, 'timestamp': '2025-09-10 02:30:07.977687', 'step': 3499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.009599', 'step': 3499, 'epoch': 1} {'type': 'loss', 'content': 0.10148019343614578, 'timestamp': '2025-09-10 02:30:08.032873', 'step': 3500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-09-10 02:30:12.852231', 'step': 3500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:12.901808', 'step': 3500, 'epoch': 1} {'type': 'loss', 'content': 0.1969059705734253, 'timestamp': '2025-09-10 02:30:12.911684', 'step': 3501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.951828', 'step': 3501, 'epoch': 1} {'type': 'loss', 'content': 0.11697427183389664, 'timestamp': '2025-09-10 02:30:12.954209', 'step': 3502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:12.984825', 'step': 3502, 'epoch': 1} {'type': 'loss', 'content': 0.1231897696852684, 'timestamp': '2025-09-10 02:30:12.987823', 'step': 3503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:13.020419', 'step': 3503, 'epoch': 1} {'type': 'loss', 'content': 0.1922757774591446, 'timestamp': '2025-09-10 02:30:13.043951', 'step': 3504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.079612', 'step': 3504, 'epoch': 1} {'type': 'loss', 'content': 0.1370275616645813, 'timestamp': '2025-09-10 02:30:13.081811', 'step': 3505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:13.122288', 'step': 3505, 'epoch': 1} {'type': 'loss', 'content': 0.12650999426841736, 'timestamp': '2025-09-10 02:30:13.124784', 'step': 3506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:13.159444', 'step': 3506, 'epoch': 1} {'type': 'loss', 'content': 0.1926342099905014, 'timestamp': '2025-09-10 02:30:13.162067', 'step': 3507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.193796', 'step': 3507, 'epoch': 1} {'type': 'loss', 'content': 0.1303957998752594, 'timestamp': '2025-09-10 02:30:13.217405', 'step': 3508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.250801', 'step': 3508, 'epoch': 1} {'type': 'loss', 'content': 0.13674330711364746, 'timestamp': '2025-09-10 02:30:13.257307', 'step': 3509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:13.299579', 'step': 3509, 'epoch': 1} {'type': 'loss', 'content': 0.19180165231227875, 'timestamp': '2025-09-10 02:30:13.301990', 'step': 3510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:13.334964', 'step': 3510, 'epoch': 1} {'type': 'loss', 'content': 0.22713446617126465, 'timestamp': '2025-09-10 02:30:13.337634', 'step': 3511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:13.370052', 'step': 3511, 'epoch': 1} {'type': 'loss', 'content': 0.16618426144123077, 'timestamp': '2025-09-10 02:30:13.396028', 'step': 3512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.433259', 'step': 3512, 'epoch': 1} {'type': 'loss', 'content': 0.1520688235759735, 'timestamp': '2025-09-10 02:30:13.435645', 'step': 3513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:13.469014', 'step': 3513, 'epoch': 1} {'type': 'loss', 'content': 0.17512311041355133, 'timestamp': '2025-09-10 02:30:13.472609', 'step': 3514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:13.515182', 'step': 3514, 'epoch': 1} {'type': 'loss', 'content': 0.28376224637031555, 'timestamp': '2025-09-10 02:30:13.517958', 'step': 3515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:13.553524', 'step': 3515, 'epoch': 1} {'type': 'loss', 'content': 0.12961207330226898, 'timestamp': '2025-09-10 02:30:13.578641', 'step': 3516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:13.616560', 'step': 3516, 'epoch': 1} {'type': 'loss', 'content': 0.16064538061618805, 'timestamp': '2025-09-10 02:30:13.619300', 'step': 3517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:13.653043', 'step': 3517, 'epoch': 1} {'type': 'loss', 'content': 0.14962001144886017, 'timestamp': '2025-09-10 02:30:13.655946', 'step': 3518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:13.689899', 'step': 3518, 'epoch': 1} {'type': 'loss', 'content': 0.21392980217933655, 'timestamp': '2025-09-10 02:30:13.693141', 'step': 3519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.729448', 'step': 3519, 'epoch': 1} {'type': 'loss', 'content': 0.19843590259552002, 'timestamp': '2025-09-10 02:30:13.753242', 'step': 3520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:13.786953', 'step': 3520, 'epoch': 1} {'type': 'loss', 'content': 0.1780824363231659, 'timestamp': '2025-09-10 02:30:13.789529', 'step': 3521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:13.821231', 'step': 3521, 'epoch': 1} {'type': 'loss', 'content': 0.10870222002267838, 'timestamp': '2025-09-10 02:30:13.825226', 'step': 3522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:13.862189', 'step': 3522, 'epoch': 1} {'type': 'loss', 'content': 0.20332863926887512, 'timestamp': '2025-09-10 02:30:13.866364', 'step': 3523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.901721', 'step': 3523, 'epoch': 1} {'type': 'loss', 'content': 0.12394332140684128, 'timestamp': '2025-09-10 02:30:13.926580', 'step': 3524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:13.957790', 'step': 3524, 'epoch': 1} {'type': 'loss', 'content': 0.11823434382677078, 'timestamp': '2025-09-10 02:30:13.960530', 'step': 3525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:13.994826', 'step': 3525, 'epoch': 1} {'type': 'loss', 'content': 0.14443905651569366, 'timestamp': '2025-09-10 02:30:13.997481', 'step': 3526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.028038', 'step': 3526, 'epoch': 1} {'type': 'loss', 'content': 0.26700475811958313, 'timestamp': '2025-09-10 02:30:14.030863', 'step': 3527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:14.062072', 'step': 3527, 'epoch': 1} {'type': 'loss', 'content': 0.21627923846244812, 'timestamp': '2025-09-10 02:30:14.087914', 'step': 3528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:14.121472', 'step': 3528, 'epoch': 1} {'type': 'loss', 'content': 0.13655583560466766, 'timestamp': '2025-09-10 02:30:14.123743', 'step': 3529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:14.154751', 'step': 3529, 'epoch': 1} {'type': 'loss', 'content': 0.21067345142364502, 'timestamp': '2025-09-10 02:30:14.157029', 'step': 3530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:14.186793', 'step': 3530, 'epoch': 1} {'type': 'loss', 'content': 0.22653894126415253, 'timestamp': '2025-09-10 02:30:14.189751', 'step': 3531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:14.222445', 'step': 3531, 'epoch': 1} {'type': 'loss', 'content': 0.1305496096611023, 'timestamp': '2025-09-10 02:30:14.246237', 'step': 3532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.277611', 'step': 3532, 'epoch': 1} {'type': 'loss', 'content': 0.21591360867023468, 'timestamp': '2025-09-10 02:30:14.280011', 'step': 3533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:14.310899', 'step': 3533, 'epoch': 1} {'type': 'loss', 'content': 0.18599703907966614, 'timestamp': '2025-09-10 02:30:14.313595', 'step': 3534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.345276', 'step': 3534, 'epoch': 1} {'type': 'loss', 'content': 0.12176522612571716, 'timestamp': '2025-09-10 02:30:14.347838', 'step': 3535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.378537', 'step': 3535, 'epoch': 1} {'type': 'loss', 'content': 0.1786407232284546, 'timestamp': '2025-09-10 02:30:14.402773', 'step': 3536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.433706', 'step': 3536, 'epoch': 1} {'type': 'loss', 'content': 0.17272333800792694, 'timestamp': '2025-09-10 02:30:14.436096', 'step': 3537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:14.466770', 'step': 3537, 'epoch': 1} {'type': 'loss', 'content': 0.083725206553936, 'timestamp': '2025-09-10 02:30:14.470794', 'step': 3538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:14.502636', 'step': 3538, 'epoch': 1} {'type': 'loss', 'content': 0.23608660697937012, 'timestamp': '2025-09-10 02:30:14.505138', 'step': 3539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:14.537438', 'step': 3539, 'epoch': 1} {'type': 'loss', 'content': 0.23389649391174316, 'timestamp': '2025-09-10 02:30:14.561159', 'step': 3540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:14.594573', 'step': 3540, 'epoch': 1} {'type': 'loss', 'content': 0.25013792514801025, 'timestamp': '2025-09-10 02:30:14.597670', 'step': 3541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.628771', 'step': 3541, 'epoch': 1} {'type': 'loss', 'content': 0.2180318683385849, 'timestamp': '2025-09-10 02:30:14.631279', 'step': 3542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:14.662602', 'step': 3542, 'epoch': 1} {'type': 'loss', 'content': 0.17439335584640503, 'timestamp': '2025-09-10 02:30:14.664787', 'step': 3543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:14.699222', 'step': 3543, 'epoch': 1} {'type': 'loss', 'content': 0.12814907729625702, 'timestamp': '2025-09-10 02:30:14.723425', 'step': 3544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:14.755470', 'step': 3544, 'epoch': 1} {'type': 'loss', 'content': 0.19979257881641388, 'timestamp': '2025-09-10 02:30:14.757825', 'step': 3545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:14.788429', 'step': 3545, 'epoch': 1} {'type': 'loss', 'content': 0.23389221727848053, 'timestamp': '2025-09-10 02:30:14.791213', 'step': 3546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:14.827208', 'step': 3546, 'epoch': 1} {'type': 'loss', 'content': 0.16425122320652008, 'timestamp': '2025-09-10 02:30:14.831501', 'step': 3547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:14.862982', 'step': 3547, 'epoch': 1} {'type': 'loss', 'content': 0.09375584870576859, 'timestamp': '2025-09-10 02:30:14.886851', 'step': 3548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:14.922514', 'step': 3548, 'epoch': 1} {'type': 'loss', 'content': 0.13731026649475098, 'timestamp': '2025-09-10 02:30:14.926876', 'step': 3549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:14.957499', 'step': 3549, 'epoch': 1} {'type': 'loss', 'content': 0.20924624800682068, 'timestamp': '2025-09-10 02:30:14.959389', 'step': 3550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.990098', 'step': 3550, 'epoch': 1} {'type': 'loss', 'content': 0.11078847199678421, 'timestamp': '2025-09-10 02:30:14.992981', 'step': 3551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:15.024449', 'step': 3551, 'epoch': 1} {'type': 'loss', 'content': 0.11008476465940475, 'timestamp': '2025-09-10 02:30:15.048220', 'step': 3552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:15.080635', 'step': 3552, 'epoch': 1} {'type': 'loss', 'content': 0.10329203307628632, 'timestamp': '2025-09-10 02:30:15.083303', 'step': 3553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:15.114628', 'step': 3553, 'epoch': 1} {'type': 'loss', 'content': 0.10865867137908936, 'timestamp': '2025-09-10 02:30:15.117204', 'step': 3554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.148102', 'step': 3554, 'epoch': 1} {'type': 'loss', 'content': 0.16440905630588531, 'timestamp': '2025-09-10 02:30:15.150340', 'step': 3555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.181118', 'step': 3555, 'epoch': 1} {'type': 'loss', 'content': 0.2055884301662445, 'timestamp': '2025-09-10 02:30:15.205146', 'step': 3556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.238463', 'step': 3556, 'epoch': 1} {'type': 'loss', 'content': 0.144230455160141, 'timestamp': '2025-09-10 02:30:15.240799', 'step': 3557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:15.272074', 'step': 3557, 'epoch': 1} {'type': 'loss', 'content': 0.1792898327112198, 'timestamp': '2025-09-10 02:30:15.274881', 'step': 3558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:15.308127', 'step': 3558, 'epoch': 1} {'type': 'loss', 'content': 0.23533351719379425, 'timestamp': '2025-09-10 02:30:15.310514', 'step': 3559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:15.341508', 'step': 3559, 'epoch': 1} {'type': 'loss', 'content': 0.07880992442369461, 'timestamp': '2025-09-10 02:30:15.365231', 'step': 3560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:15.396554', 'step': 3560, 'epoch': 1} {'type': 'loss', 'content': 0.2179645150899887, 'timestamp': '2025-09-10 02:30:15.399570', 'step': 3561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:15.431807', 'step': 3561, 'epoch': 1} {'type': 'loss', 'content': 0.2031492292881012, 'timestamp': '2025-09-10 02:30:15.434609', 'step': 3562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:15.465298', 'step': 3562, 'epoch': 1} {'type': 'loss', 'content': 0.13540960848331451, 'timestamp': '2025-09-10 02:30:15.467782', 'step': 3563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:15.498360', 'step': 3563, 'epoch': 1} {'type': 'loss', 'content': 0.16452085971832275, 'timestamp': '2025-09-10 02:30:15.522362', 'step': 3564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.553694', 'step': 3564, 'epoch': 1} {'type': 'loss', 'content': 0.14094725251197815, 'timestamp': '2025-09-10 02:30:15.557132', 'step': 3565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:15.591045', 'step': 3565, 'epoch': 1} {'type': 'loss', 'content': 0.25033673644065857, 'timestamp': '2025-09-10 02:30:15.593522', 'step': 3566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.625684', 'step': 3566, 'epoch': 1} {'type': 'loss', 'content': 0.07326680421829224, 'timestamp': '2025-09-10 02:30:15.628199', 'step': 3567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.659084', 'step': 3567, 'epoch': 1} {'type': 'loss', 'content': 0.12050502002239227, 'timestamp': '2025-09-10 02:30:15.682500', 'step': 3568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:15.716704', 'step': 3568, 'epoch': 1} {'type': 'loss', 'content': 0.21232619881629944, 'timestamp': '2025-09-10 02:30:15.719257', 'step': 3569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.752456', 'step': 3569, 'epoch': 1} {'type': 'loss', 'content': 0.2216765433549881, 'timestamp': '2025-09-10 02:30:15.754744', 'step': 3570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:15.786076', 'step': 3570, 'epoch': 1} {'type': 'loss', 'content': 0.1726328730583191, 'timestamp': '2025-09-10 02:30:15.788864', 'step': 3571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:15.820521', 'step': 3571, 'epoch': 1} {'type': 'loss', 'content': 0.15739478170871735, 'timestamp': '2025-09-10 02:30:15.845553', 'step': 3572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:15.878348', 'step': 3572, 'epoch': 1} {'type': 'loss', 'content': 0.16115660965442657, 'timestamp': '2025-09-10 02:30:15.881646', 'step': 3573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:15.913628', 'step': 3573, 'epoch': 1} {'type': 'loss', 'content': 0.22748415172100067, 'timestamp': '2025-09-10 02:30:15.916114', 'step': 3574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:15.946897', 'step': 3574, 'epoch': 1} {'type': 'loss', 'content': 0.1656392514705658, 'timestamp': '2025-09-10 02:30:15.949187', 'step': 3575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.979722', 'step': 3575, 'epoch': 1} {'type': 'loss', 'content': 0.23595890402793884, 'timestamp': '2025-09-10 02:30:16.005516', 'step': 3576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:16.037773', 'step': 3576, 'epoch': 1} {'type': 'loss', 'content': 0.24830389022827148, 'timestamp': '2025-09-10 02:30:16.039981', 'step': 3577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:16.070621', 'step': 3577, 'epoch': 1} {'type': 'loss', 'content': 0.19512905180454254, 'timestamp': '2025-09-10 02:30:16.072553', 'step': 3578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:16.102990', 'step': 3578, 'epoch': 1} {'type': 'loss', 'content': 0.08177857846021652, 'timestamp': '2025-09-10 02:30:16.105688', 'step': 3579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.136992', 'step': 3579, 'epoch': 1} {'type': 'loss', 'content': 0.08539135754108429, 'timestamp': '2025-09-10 02:30:16.160611', 'step': 3580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.191286', 'step': 3580, 'epoch': 1} {'type': 'loss', 'content': 0.10395694524049759, 'timestamp': '2025-09-10 02:30:16.193544', 'step': 3581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:16.225906', 'step': 3581, 'epoch': 1} {'type': 'loss', 'content': 0.12892693281173706, 'timestamp': '2025-09-10 02:30:16.228519', 'step': 3582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.260663', 'step': 3582, 'epoch': 1} {'type': 'loss', 'content': 0.16468381881713867, 'timestamp': '2025-09-10 02:30:16.262916', 'step': 3583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:16.294085', 'step': 3583, 'epoch': 1} {'type': 'loss', 'content': 0.15726809203624725, 'timestamp': '2025-09-10 02:30:16.318082', 'step': 3584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.349949', 'step': 3584, 'epoch': 1} {'type': 'loss', 'content': 0.20334669947624207, 'timestamp': '2025-09-10 02:30:16.352515', 'step': 3585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.384076', 'step': 3585, 'epoch': 1} {'type': 'loss', 'content': 0.28573349118232727, 'timestamp': '2025-09-10 02:30:16.386354', 'step': 3586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.417783', 'step': 3586, 'epoch': 1} {'type': 'loss', 'content': 0.13010144233703613, 'timestamp': '2025-09-10 02:30:16.420043', 'step': 3587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.452483', 'step': 3587, 'epoch': 1} {'type': 'loss', 'content': 0.12962207198143005, 'timestamp': '2025-09-10 02:30:16.476736', 'step': 3588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.508363', 'step': 3588, 'epoch': 1} {'type': 'loss', 'content': 0.14719103276729584, 'timestamp': '2025-09-10 02:30:16.510637', 'step': 3589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.543130', 'step': 3589, 'epoch': 1} {'type': 'loss', 'content': 0.21215131878852844, 'timestamp': '2025-09-10 02:30:16.545571', 'step': 3590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.576611', 'step': 3590, 'epoch': 1} {'type': 'loss', 'content': 0.1089826226234436, 'timestamp': '2025-09-10 02:30:16.578871', 'step': 3591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.609972', 'step': 3591, 'epoch': 1} {'type': 'loss', 'content': 0.1240103468298912, 'timestamp': '2025-09-10 02:30:16.633849', 'step': 3592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:16.665509', 'step': 3592, 'epoch': 1} {'type': 'loss', 'content': 0.14972324669361115, 'timestamp': '2025-09-10 02:30:16.667896', 'step': 3593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.700451', 'step': 3593, 'epoch': 1} {'type': 'loss', 'content': 0.19196729362010956, 'timestamp': '2025-09-10 02:30:16.703412', 'step': 3594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:16.736502', 'step': 3594, 'epoch': 1} {'type': 'loss', 'content': 0.14164559543132782, 'timestamp': '2025-09-10 02:30:16.738516', 'step': 3595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:16.769672', 'step': 3595, 'epoch': 1} {'type': 'loss', 'content': 0.09529614448547363, 'timestamp': '2025-09-10 02:30:16.793610', 'step': 3596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:16.829481', 'step': 3596, 'epoch': 1} {'type': 'loss', 'content': 0.2153232991695404, 'timestamp': '2025-09-10 02:30:16.834911', 'step': 3597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:16.867143', 'step': 3597, 'epoch': 1} {'type': 'loss', 'content': 0.11157292872667313, 'timestamp': '2025-09-10 02:30:16.869828', 'step': 3598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.902658', 'step': 3598, 'epoch': 1} {'type': 'loss', 'content': 0.13893848657608032, 'timestamp': '2025-09-10 02:30:16.906708', 'step': 3599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.947792', 'step': 3599, 'epoch': 1} {'type': 'loss', 'content': 0.21918414533138275, 'timestamp': '2025-09-10 02:30:16.972072', 'step': 3600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.003490', 'step': 3600, 'epoch': 1} {'type': 'loss', 'content': 0.1842346042394638, 'timestamp': '2025-09-10 02:30:17.005912', 'step': 3601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.040809', 'step': 3601, 'epoch': 1} {'type': 'loss', 'content': 0.11769503355026245, 'timestamp': '2025-09-10 02:30:17.043320', 'step': 3602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:17.075151', 'step': 3602, 'epoch': 1} {'type': 'loss', 'content': 0.10792194306850433, 'timestamp': '2025-09-10 02:30:17.077732', 'step': 3603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:17.108617', 'step': 3603, 'epoch': 1} {'type': 'loss', 'content': 0.17331819236278534, 'timestamp': '2025-09-10 02:30:17.132230', 'step': 3604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.163713', 'step': 3604, 'epoch': 1} {'type': 'loss', 'content': 0.09271150082349777, 'timestamp': '2025-09-10 02:30:17.165778', 'step': 3605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:17.197497', 'step': 3605, 'epoch': 1} {'type': 'loss', 'content': 0.14619693160057068, 'timestamp': '2025-09-10 02:30:17.200098', 'step': 3606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.235709', 'step': 3606, 'epoch': 1} {'type': 'loss', 'content': 0.2715449333190918, 'timestamp': '2025-09-10 02:30:17.238092', 'step': 3607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.270078', 'step': 3607, 'epoch': 1} {'type': 'loss', 'content': 0.24956218898296356, 'timestamp': '2025-09-10 02:30:17.294280', 'step': 3608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.325357', 'step': 3608, 'epoch': 1} {'type': 'loss', 'content': 0.1655322015285492, 'timestamp': '2025-09-10 02:30:17.328149', 'step': 3609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:17.359057', 'step': 3609, 'epoch': 1} {'type': 'loss', 'content': 0.14733751118183136, 'timestamp': '2025-09-10 02:30:17.361945', 'step': 3610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.393320', 'step': 3610, 'epoch': 1} {'type': 'loss', 'content': 0.2601996660232544, 'timestamp': '2025-09-10 02:30:17.395742', 'step': 3611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:17.427932', 'step': 3611, 'epoch': 1} {'type': 'loss', 'content': 0.19863681495189667, 'timestamp': '2025-09-10 02:30:17.452105', 'step': 3612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:17.483278', 'step': 3612, 'epoch': 1} {'type': 'loss', 'content': 0.1447916030883789, 'timestamp': '2025-09-10 02:30:17.485863', 'step': 3613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.516718', 'step': 3613, 'epoch': 1} {'type': 'loss', 'content': 0.1895916759967804, 'timestamp': '2025-09-10 02:30:17.519585', 'step': 3614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:17.551646', 'step': 3614, 'epoch': 1} {'type': 'loss', 'content': 0.09778494387865067, 'timestamp': '2025-09-10 02:30:17.554112', 'step': 3615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:17.584635', 'step': 3615, 'epoch': 1} {'type': 'loss', 'content': 0.19183409214019775, 'timestamp': '2025-09-10 02:30:17.609046', 'step': 3616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:17.641628', 'step': 3616, 'epoch': 1} {'type': 'loss', 'content': 0.12379614263772964, 'timestamp': '2025-09-10 02:30:17.644718', 'step': 3617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:17.676153', 'step': 3617, 'epoch': 1} {'type': 'loss', 'content': 0.1727534830570221, 'timestamp': '2025-09-10 02:30:17.678632', 'step': 3618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.709670', 'step': 3618, 'epoch': 1} {'type': 'loss', 'content': 0.1740787923336029, 'timestamp': '2025-09-10 02:30:17.712979', 'step': 3619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:17.744188', 'step': 3619, 'epoch': 1} {'type': 'loss', 'content': 0.21224269270896912, 'timestamp': '2025-09-10 02:30:17.767969', 'step': 3620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:17.798371', 'step': 3620, 'epoch': 1} {'type': 'loss', 'content': 0.18187950551509857, 'timestamp': '2025-09-10 02:30:17.801426', 'step': 3621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:17.833836', 'step': 3621, 'epoch': 1} {'type': 'loss', 'content': 0.12558740377426147, 'timestamp': '2025-09-10 02:30:17.845391', 'step': 3622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:17.886963', 'step': 3622, 'epoch': 1} {'type': 'loss', 'content': 0.10272984206676483, 'timestamp': '2025-09-10 02:30:17.889373', 'step': 3623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.920505', 'step': 3623, 'epoch': 1} {'type': 'loss', 'content': 0.2930670380592346, 'timestamp': '2025-09-10 02:30:17.944027', 'step': 3624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.974839', 'step': 3624, 'epoch': 1} {'type': 'loss', 'content': 0.21005019545555115, 'timestamp': '2025-09-10 02:30:17.977294', 'step': 3625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:18.008143', 'step': 3625, 'epoch': 1} {'type': 'loss', 'content': 0.08095932006835938, 'timestamp': '2025-09-10 02:30:18.010828', 'step': 3626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:18.044056', 'step': 3626, 'epoch': 1} {'type': 'loss', 'content': 0.15537796914577484, 'timestamp': '2025-09-10 02:30:18.046774', 'step': 3627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:18.078844', 'step': 3627, 'epoch': 1} {'type': 'loss', 'content': 0.13215628266334534, 'timestamp': '2025-09-10 02:30:18.102946', 'step': 3628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:18.134936', 'step': 3628, 'epoch': 1} {'type': 'loss', 'content': 0.23927061259746552, 'timestamp': '2025-09-10 02:30:18.137212', 'step': 3629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:18.168425', 'step': 3629, 'epoch': 1} {'type': 'loss', 'content': 0.17642073333263397, 'timestamp': '2025-09-10 02:30:18.172559', 'step': 3630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:18.205564', 'step': 3630, 'epoch': 1} {'type': 'loss', 'content': 0.2036905735731125, 'timestamp': '2025-09-10 02:30:18.208152', 'step': 3631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:18.239700', 'step': 3631, 'epoch': 1} {'type': 'loss', 'content': 0.18147936463356018, 'timestamp': '2025-09-10 02:30:18.265420', 'step': 3632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:18.296754', 'step': 3632, 'epoch': 1} {'type': 'loss', 'content': 0.2563856542110443, 'timestamp': '2025-09-10 02:30:18.301167', 'step': 3633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:18.334601', 'step': 3633, 'epoch': 1} {'type': 'loss', 'content': 0.1813947558403015, 'timestamp': '2025-09-10 02:30:18.338826', 'step': 3634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:18.372961', 'step': 3634, 'epoch': 1} {'type': 'loss', 'content': 0.1076381579041481, 'timestamp': '2025-09-10 02:30:18.375435', 'step': 3635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:18.410412', 'step': 3635, 'epoch': 1} {'type': 'loss', 'content': 0.2317906767129898, 'timestamp': '2025-09-10 02:30:18.434431', 'step': 3636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:18.471095', 'step': 3636, 'epoch': 1} {'type': 'loss', 'content': 0.1546238511800766, 'timestamp': '2025-09-10 02:30:18.473892', 'step': 3637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:18.511111', 'step': 3637, 'epoch': 1} {'type': 'loss', 'content': 0.14637070894241333, 'timestamp': '2025-09-10 02:30:18.513783', 'step': 3638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:18.547372', 'step': 3638, 'epoch': 1} {'type': 'loss', 'content': 0.1701122373342514, 'timestamp': '2025-09-10 02:30:18.550454', 'step': 3639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:18.582769', 'step': 3639, 'epoch': 1} {'type': 'loss', 'content': 0.1890607327222824, 'timestamp': '2025-09-10 02:30:18.606339', 'step': 3640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:18.638494', 'step': 3640, 'epoch': 1} {'type': 'loss', 'content': 0.20386043190956116, 'timestamp': '2025-09-10 02:30:18.641699', 'step': 3641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:18.676413', 'step': 3641, 'epoch': 1} {'type': 'loss', 'content': 0.17365694046020508, 'timestamp': '2025-09-10 02:30:18.679738', 'step': 3642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:18.714602', 'step': 3642, 'epoch': 1} {'type': 'loss', 'content': 0.13885320723056793, 'timestamp': '2025-09-10 02:30:18.717529', 'step': 3643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:18.755084', 'step': 3643, 'epoch': 1} {'type': 'loss', 'content': 0.10015822947025299, 'timestamp': '2025-09-10 02:30:18.781016', 'step': 3644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:18.814638', 'step': 3644, 'epoch': 1} {'type': 'loss', 'content': 0.08653202652931213, 'timestamp': '2025-09-10 02:30:18.820601', 'step': 3645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:18.869231', 'step': 3645, 'epoch': 1} {'type': 'loss', 'content': 0.13885526359081268, 'timestamp': '2025-09-10 02:30:18.872009', 'step': 3646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:18.903734', 'step': 3646, 'epoch': 1} {'type': 'loss', 'content': 0.16592863202095032, 'timestamp': '2025-09-10 02:30:18.906153', 'step': 3647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:18.941663', 'step': 3647, 'epoch': 1} {'type': 'loss', 'content': 0.2641632854938507, 'timestamp': '2025-09-10 02:30:18.965696', 'step': 3648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:18.997551', 'step': 3648, 'epoch': 1} {'type': 'loss', 'content': 0.18279831111431122, 'timestamp': '2025-09-10 02:30:19.000234', 'step': 3649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:19.033877', 'step': 3649, 'epoch': 1} {'type': 'loss', 'content': 0.2888513207435608, 'timestamp': '2025-09-10 02:30:19.036236', 'step': 3650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.068125', 'step': 3650, 'epoch': 1} {'type': 'loss', 'content': 0.14251987636089325, 'timestamp': '2025-09-10 02:30:19.070285', 'step': 3651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:30:19.124657', 'step': 3651, 'epoch': 1} {'type': 'loss', 'content': 0.1216425746679306, 'timestamp': '2025-09-10 02:30:19.152578', 'step': 3652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:19.186733', 'step': 3652, 'epoch': 1} {'type': 'loss', 'content': 0.13330936431884766, 'timestamp': '2025-09-10 02:30:19.189132', 'step': 3653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.220465', 'step': 3653, 'epoch': 1} {'type': 'loss', 'content': 0.14585918188095093, 'timestamp': '2025-09-10 02:30:19.225180', 'step': 3654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.260471', 'step': 3654, 'epoch': 1} {'type': 'loss', 'content': 0.12681782245635986, 'timestamp': '2025-09-10 02:30:19.262628', 'step': 3655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.294281', 'step': 3655, 'epoch': 1} {'type': 'loss', 'content': 0.14324982464313507, 'timestamp': '2025-09-10 02:30:19.319259', 'step': 3656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:19.352562', 'step': 3656, 'epoch': 1} {'type': 'loss', 'content': 0.19469283521175385, 'timestamp': '2025-09-10 02:30:19.356523', 'step': 3657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.389074', 'step': 3657, 'epoch': 1} {'type': 'loss', 'content': 0.16296446323394775, 'timestamp': '2025-09-10 02:30:19.392154', 'step': 3658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:19.427077', 'step': 3658, 'epoch': 1} {'type': 'loss', 'content': 0.12207749485969543, 'timestamp': '2025-09-10 02:30:19.434671', 'step': 3659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:19.477807', 'step': 3659, 'epoch': 1} {'type': 'loss', 'content': 0.29525864124298096, 'timestamp': '2025-09-10 02:30:19.501492', 'step': 3660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.540843', 'step': 3660, 'epoch': 1} {'type': 'loss', 'content': 0.20346680283546448, 'timestamp': '2025-09-10 02:30:19.543240', 'step': 3661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:19.575096', 'step': 3661, 'epoch': 1} {'type': 'loss', 'content': 0.1959007829427719, 'timestamp': '2025-09-10 02:30:19.577976', 'step': 3662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.614620', 'step': 3662, 'epoch': 1} {'type': 'loss', 'content': 0.26288115978240967, 'timestamp': '2025-09-10 02:30:19.617268', 'step': 3663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.648391', 'step': 3663, 'epoch': 1} {'type': 'loss', 'content': 0.15045087039470673, 'timestamp': '2025-09-10 02:30:19.672159', 'step': 3664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:19.703325', 'step': 3664, 'epoch': 1} {'type': 'loss', 'content': 0.13519926369190216, 'timestamp': '2025-09-10 02:30:19.706875', 'step': 3665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:19.748176', 'step': 3665, 'epoch': 1} {'type': 'loss', 'content': 0.2005123645067215, 'timestamp': '2025-09-10 02:30:19.750549', 'step': 3666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:19.782677', 'step': 3666, 'epoch': 1} {'type': 'loss', 'content': 0.2011776864528656, 'timestamp': '2025-09-10 02:30:19.787373', 'step': 3667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.819650', 'step': 3667, 'epoch': 1} {'type': 'loss', 'content': 0.1616307944059372, 'timestamp': '2025-09-10 02:30:19.844023', 'step': 3668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:19.894745', 'step': 3668, 'epoch': 1} {'type': 'loss', 'content': 0.13044685125350952, 'timestamp': '2025-09-10 02:30:19.897696', 'step': 3669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:19.932782', 'step': 3669, 'epoch': 1} {'type': 'loss', 'content': 0.15174159407615662, 'timestamp': '2025-09-10 02:30:19.936973', 'step': 3670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.973193', 'step': 3670, 'epoch': 1} {'type': 'loss', 'content': 0.15117746591567993, 'timestamp': '2025-09-10 02:30:19.976535', 'step': 3671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:20.009084', 'step': 3671, 'epoch': 1} {'type': 'loss', 'content': 0.24448855221271515, 'timestamp': '2025-09-10 02:30:20.033312', 'step': 3672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.072872', 'step': 3672, 'epoch': 1} {'type': 'loss', 'content': 0.10148905217647552, 'timestamp': '2025-09-10 02:30:20.080335', 'step': 3673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:20.113924', 'step': 3673, 'epoch': 1} {'type': 'loss', 'content': 0.1766008585691452, 'timestamp': '2025-09-10 02:30:20.116378', 'step': 3674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.148080', 'step': 3674, 'epoch': 1} {'type': 'loss', 'content': 0.26209935545921326, 'timestamp': '2025-09-10 02:30:20.150709', 'step': 3675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:20.182877', 'step': 3675, 'epoch': 1} {'type': 'loss', 'content': 0.07245077937841415, 'timestamp': '2025-09-10 02:30:20.210769', 'step': 3676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:20.244866', 'step': 3676, 'epoch': 1} {'type': 'loss', 'content': 0.1589319109916687, 'timestamp': '2025-09-10 02:30:20.247303', 'step': 3677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.279127', 'step': 3677, 'epoch': 1} {'type': 'loss', 'content': 0.14608848094940186, 'timestamp': '2025-09-10 02:30:20.281763', 'step': 3678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.314053', 'step': 3678, 'epoch': 1} {'type': 'loss', 'content': 0.14918895065784454, 'timestamp': '2025-09-10 02:30:20.317005', 'step': 3679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.351468', 'step': 3679, 'epoch': 1} {'type': 'loss', 'content': 0.2086973786354065, 'timestamp': '2025-09-10 02:30:20.376882', 'step': 3680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:20.409373', 'step': 3680, 'epoch': 1} {'type': 'loss', 'content': 0.1517936885356903, 'timestamp': '2025-09-10 02:30:20.413446', 'step': 3681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.446254', 'step': 3681, 'epoch': 1} {'type': 'loss', 'content': 0.1227240115404129, 'timestamp': '2025-09-10 02:30:20.449922', 'step': 3682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.482438', 'step': 3682, 'epoch': 1} {'type': 'loss', 'content': 0.14939939975738525, 'timestamp': '2025-09-10 02:30:20.486882', 'step': 3683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.520083', 'step': 3683, 'epoch': 1} {'type': 'loss', 'content': 0.15178906917572021, 'timestamp': '2025-09-10 02:30:20.544178', 'step': 3684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:20.578107', 'step': 3684, 'epoch': 1} {'type': 'loss', 'content': 0.12508952617645264, 'timestamp': '2025-09-10 02:30:20.580876', 'step': 3685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.622112', 'step': 3685, 'epoch': 1} {'type': 'loss', 'content': 0.33752164244651794, 'timestamp': '2025-09-10 02:30:20.629565', 'step': 3686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.667192', 'step': 3686, 'epoch': 1} {'type': 'loss', 'content': 0.1927785575389862, 'timestamp': '2025-09-10 02:30:20.669859', 'step': 3687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.701446', 'step': 3687, 'epoch': 1} {'type': 'loss', 'content': 0.1836523860692978, 'timestamp': '2025-09-10 02:30:20.738495', 'step': 3688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:21.184177', 'step': 3688, 'epoch': 1} {'type': 'loss', 'content': 0.19116972386837006, 'timestamp': '2025-09-10 02:30:21.191810', 'step': 3689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:21.225483', 'step': 3689, 'epoch': 1} {'type': 'loss', 'content': 0.21726927161216736, 'timestamp': '2025-09-10 02:30:21.228247', 'step': 3690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.265084', 'step': 3690, 'epoch': 1} {'type': 'loss', 'content': 0.15814080834388733, 'timestamp': '2025-09-10 02:30:21.271614', 'step': 3691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:21.318518', 'step': 3691, 'epoch': 1} {'type': 'loss', 'content': 0.14305271208286285, 'timestamp': '2025-09-10 02:30:21.342273', 'step': 3692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.379922', 'step': 3692, 'epoch': 1} {'type': 'loss', 'content': 0.14815711975097656, 'timestamp': '2025-09-10 02:30:21.382594', 'step': 3693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.421963', 'step': 3693, 'epoch': 1} {'type': 'loss', 'content': 0.2521477937698364, 'timestamp': '2025-09-10 02:30:21.424094', 'step': 3694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.455812', 'step': 3694, 'epoch': 1} {'type': 'loss', 'content': 0.2640446722507477, 'timestamp': '2025-09-10 02:30:21.461942', 'step': 3695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.495754', 'step': 3695, 'epoch': 1} {'type': 'loss', 'content': 0.13586461544036865, 'timestamp': '2025-09-10 02:30:21.520039', 'step': 3696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:21.556319', 'step': 3696, 'epoch': 1} {'type': 'loss', 'content': 0.0714545026421547, 'timestamp': '2025-09-10 02:30:21.558958', 'step': 3697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.597060', 'step': 3697, 'epoch': 1} {'type': 'loss', 'content': 0.1879846602678299, 'timestamp': '2025-09-10 02:30:21.600099', 'step': 3698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.634344', 'step': 3698, 'epoch': 1} {'type': 'loss', 'content': 0.12096007168292999, 'timestamp': '2025-09-10 02:30:21.636799', 'step': 3699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:21.671381', 'step': 3699, 'epoch': 1} {'type': 'loss', 'content': 0.07344863563776016, 'timestamp': '2025-09-10 02:30:21.695254', 'step': 3700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.730813', 'step': 3700, 'epoch': 1} {'type': 'loss', 'content': 0.2069837898015976, 'timestamp': '2025-09-10 02:30:21.733844', 'step': 3701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.769706', 'step': 3701, 'epoch': 1} {'type': 'loss', 'content': 0.15127040445804596, 'timestamp': '2025-09-10 02:30:21.772282', 'step': 3702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:21.803998', 'step': 3702, 'epoch': 1} {'type': 'loss', 'content': 0.17084956169128418, 'timestamp': '2025-09-10 02:30:21.807210', 'step': 3703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.840806', 'step': 3703, 'epoch': 1} {'type': 'loss', 'content': 0.2563324570655823, 'timestamp': '2025-09-10 02:30:21.875582', 'step': 3704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:21.909055', 'step': 3704, 'epoch': 1} {'type': 'loss', 'content': 0.14763499796390533, 'timestamp': '2025-09-10 02:30:21.914674', 'step': 3705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.952726', 'step': 3705, 'epoch': 1} {'type': 'loss', 'content': 0.13680550456047058, 'timestamp': '2025-09-10 02:30:21.956981', 'step': 3706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.996634', 'step': 3706, 'epoch': 1} {'type': 'loss', 'content': 0.21205221116542816, 'timestamp': '2025-09-10 02:30:22.003586', 'step': 3707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:22.044281', 'step': 3707, 'epoch': 1} {'type': 'loss', 'content': 0.1983245611190796, 'timestamp': '2025-09-10 02:30:22.068606', 'step': 3708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:22.103533', 'step': 3708, 'epoch': 1} {'type': 'loss', 'content': 0.1518266648054123, 'timestamp': '2025-09-10 02:30:22.106008', 'step': 3709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:22.138702', 'step': 3709, 'epoch': 1} {'type': 'loss', 'content': 0.13189877569675446, 'timestamp': '2025-09-10 02:30:22.150601', 'step': 3710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.183252', 'step': 3710, 'epoch': 1} {'type': 'loss', 'content': 0.19496269524097443, 'timestamp': '2025-09-10 02:30:22.185868', 'step': 3711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:22.217981', 'step': 3711, 'epoch': 1} {'type': 'loss', 'content': 0.14498281478881836, 'timestamp': '2025-09-10 02:30:22.242156', 'step': 3712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:22.275417', 'step': 3712, 'epoch': 1} {'type': 'loss', 'content': 0.2332724630832672, 'timestamp': '2025-09-10 02:30:22.281889', 'step': 3713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:22.317119', 'step': 3713, 'epoch': 1} {'type': 'loss', 'content': 0.16590821743011475, 'timestamp': '2025-09-10 02:30:22.319642', 'step': 3714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:22.353511', 'step': 3714, 'epoch': 1} {'type': 'loss', 'content': 0.19823460280895233, 'timestamp': '2025-09-10 02:30:22.356000', 'step': 3715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:22.393469', 'step': 3715, 'epoch': 1} {'type': 'loss', 'content': 0.2693086266517639, 'timestamp': '2025-09-10 02:30:22.420053', 'step': 3716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:22.457107', 'step': 3716, 'epoch': 1} {'type': 'loss', 'content': 0.12983949482440948, 'timestamp': '2025-09-10 02:30:22.461745', 'step': 3717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.500854', 'step': 3717, 'epoch': 1} {'type': 'loss', 'content': 0.17333300411701202, 'timestamp': '2025-09-10 02:30:22.503965', 'step': 3718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:22.535947', 'step': 3718, 'epoch': 1} {'type': 'loss', 'content': 0.16865845024585724, 'timestamp': '2025-09-10 02:30:22.538649', 'step': 3719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:22.572142', 'step': 3719, 'epoch': 1} {'type': 'loss', 'content': 0.13465365767478943, 'timestamp': '2025-09-10 02:30:22.597261', 'step': 3720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:22.634804', 'step': 3720, 'epoch': 1} {'type': 'loss', 'content': 0.17252732813358307, 'timestamp': '2025-09-10 02:30:22.637207', 'step': 3721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.669936', 'step': 3721, 'epoch': 1} {'type': 'loss', 'content': 0.11262159049510956, 'timestamp': '2025-09-10 02:30:22.672244', 'step': 3722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.705975', 'step': 3722, 'epoch': 1} {'type': 'loss', 'content': 0.20489436388015747, 'timestamp': '2025-09-10 02:30:22.710756', 'step': 3723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.746228', 'step': 3723, 'epoch': 1} {'type': 'loss', 'content': 0.21307498216629028, 'timestamp': '2025-09-10 02:30:22.770194', 'step': 3724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.805701', 'step': 3724, 'epoch': 1} {'type': 'loss', 'content': 0.13009239733219147, 'timestamp': '2025-09-10 02:30:22.808545', 'step': 3725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:22.845081', 'step': 3725, 'epoch': 1} {'type': 'loss', 'content': 0.18604010343551636, 'timestamp': '2025-09-10 02:30:22.858357', 'step': 3726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.899427', 'step': 3726, 'epoch': 1} {'type': 'loss', 'content': 0.16456648707389832, 'timestamp': '2025-09-10 02:30:22.902227', 'step': 3727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:22.935436', 'step': 3727, 'epoch': 1} {'type': 'loss', 'content': 0.15067458152770996, 'timestamp': '2025-09-10 02:30:22.958965', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:30:32.317095', 'step': 3728, 'epoch': 1} {'type': 'pplx', 'content': 8389.124261631248, 'timestamp': '2025-09-10 02:30:32.325767', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:32.377440', 'step': 3728, 'epoch': 1} {'type': 'loss', 'content': 0.1311854124069214, 'timestamp': '2025-09-10 02:30:32.389600', 'step': 3729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:32.453375', 'step': 3729, 'epoch': 1} {'type': 'loss', 'content': 0.19346201419830322, 'timestamp': '2025-09-10 02:30:32.456985', 'step': 3730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:32.491511', 'step': 3730, 'epoch': 1} {'type': 'loss', 'content': 0.19528251886367798, 'timestamp': '2025-09-10 02:30:32.497061', 'step': 3731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:32.532439', 'step': 3731, 'epoch': 1} {'type': 'loss', 'content': 0.27754873037338257, 'timestamp': '2025-09-10 02:30:32.559711', 'step': 3732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:32.600842', 'step': 3732, 'epoch': 1} {'type': 'loss', 'content': 0.17484958469867706, 'timestamp': '2025-09-10 02:30:32.604697', 'step': 3733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:32.642868', 'step': 3733, 'epoch': 1} {'type': 'loss', 'content': 0.20848149061203003, 'timestamp': '2025-09-10 02:30:32.650076', 'step': 3734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:32.708379', 'step': 3734, 'epoch': 1} {'type': 'loss', 'content': 0.1828313171863556, 'timestamp': '2025-09-10 02:30:32.712733', 'step': 3735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:32.783094', 'step': 3735, 'epoch': 1} {'type': 'loss', 'content': 0.22458477318286896, 'timestamp': '2025-09-10 02:30:32.813212', 'step': 3736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:32.846218', 'step': 3736, 'epoch': 1} {'type': 'loss', 'content': 0.156229168176651, 'timestamp': '2025-09-10 02:30:32.851204', 'step': 3737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:32.885769', 'step': 3737, 'epoch': 1} {'type': 'loss', 'content': 0.08292918652296066, 'timestamp': '2025-09-10 02:30:32.889743', 'step': 3738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:32.929021', 'step': 3738, 'epoch': 1} {'type': 'loss', 'content': 0.17557655274868011, 'timestamp': '2025-09-10 02:30:32.938865', 'step': 3739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:33.018000', 'step': 3739, 'epoch': 1} {'type': 'loss', 'content': 0.19916455447673798, 'timestamp': '2025-09-10 02:30:33.043819', 'step': 3740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:33.132152', 'step': 3740, 'epoch': 1} {'type': 'loss', 'content': 0.20532678067684174, 'timestamp': '2025-09-10 02:30:33.150502', 'step': 3741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:33.226946', 'step': 3741, 'epoch': 1} {'type': 'loss', 'content': 0.10648204386234283, 'timestamp': '2025-09-10 02:30:33.234942', 'step': 3742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:33.277312', 'step': 3742, 'epoch': 1} {'type': 'loss', 'content': 0.14825588464736938, 'timestamp': '2025-09-10 02:30:33.280007', 'step': 3743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:33.314046', 'step': 3743, 'epoch': 1} {'type': 'loss', 'content': 0.2229338437318802, 'timestamp': '2025-09-10 02:30:33.338505', 'step': 3744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:33.392019', 'step': 3744, 'epoch': 1} {'type': 'loss', 'content': 0.15185005962848663, 'timestamp': '2025-09-10 02:30:33.397251', 'step': 3745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:33.435123', 'step': 3745, 'epoch': 1} {'type': 'loss', 'content': 0.12358137965202332, 'timestamp': '2025-09-10 02:30:33.441893', 'step': 3746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:33.482271', 'step': 3746, 'epoch': 1} {'type': 'loss', 'content': 0.14846689999103546, 'timestamp': '2025-09-10 02:30:33.499312', 'step': 3747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:33.572893', 'step': 3747, 'epoch': 1} {'type': 'loss', 'content': 0.14072807133197784, 'timestamp': '2025-09-10 02:30:33.600249', 'step': 3748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:33.646340', 'step': 3748, 'epoch': 1} {'type': 'loss', 'content': 0.24871540069580078, 'timestamp': '2025-09-10 02:30:33.657386', 'step': 3749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:33.707092', 'step': 3749, 'epoch': 1} {'type': 'loss', 'content': 0.16555489599704742, 'timestamp': '2025-09-10 02:30:33.710407', 'step': 3750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:33.749273', 'step': 3750, 'epoch': 1} {'type': 'loss', 'content': 0.18929705023765564, 'timestamp': '2025-09-10 02:30:33.754021', 'step': 3751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:33.786082', 'step': 3751, 'epoch': 1} {'type': 'loss', 'content': 0.0871628150343895, 'timestamp': '2025-09-10 02:30:33.809485', 'step': 3752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:33.841614', 'step': 3752, 'epoch': 1} {'type': 'loss', 'content': 0.1789841204881668, 'timestamp': '2025-09-10 02:30:33.844859', 'step': 3753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:33.877564', 'step': 3753, 'epoch': 1} {'type': 'loss', 'content': 0.12920866906642914, 'timestamp': '2025-09-10 02:30:33.879905', 'step': 3754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:33.918300', 'step': 3754, 'epoch': 1} {'type': 'loss', 'content': 0.153732031583786, 'timestamp': '2025-09-10 02:30:33.921423', 'step': 3755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:33.954453', 'step': 3755, 'epoch': 1} {'type': 'loss', 'content': 0.1945505440235138, 'timestamp': '2025-09-10 02:30:33.979480', 'step': 3756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:34.014240', 'step': 3756, 'epoch': 1} {'type': 'loss', 'content': 0.15973295271396637, 'timestamp': '2025-09-10 02:30:34.016734', 'step': 3757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:34.050219', 'step': 3757, 'epoch': 1} {'type': 'loss', 'content': 0.25329846143722534, 'timestamp': '2025-09-10 02:30:34.054600', 'step': 3758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:34.088494', 'step': 3758, 'epoch': 1} {'type': 'loss', 'content': 0.12054415792226791, 'timestamp': '2025-09-10 02:30:34.090824', 'step': 3759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:34.122336', 'step': 3759, 'epoch': 1} {'type': 'loss', 'content': 0.09407666325569153, 'timestamp': '2025-09-10 02:30:34.151063', 'step': 3760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:34.191018', 'step': 3760, 'epoch': 1} {'type': 'loss', 'content': 0.12587310373783112, 'timestamp': '2025-09-10 02:30:34.193422', 'step': 3761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:34.226602', 'step': 3761, 'epoch': 1} {'type': 'loss', 'content': 0.12772104144096375, 'timestamp': '2025-09-10 02:30:34.229353', 'step': 3762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:34.262314', 'step': 3762, 'epoch': 1} {'type': 'loss', 'content': 0.15643608570098877, 'timestamp': '2025-09-10 02:30:34.265022', 'step': 3763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:34.297280', 'step': 3763, 'epoch': 1} {'type': 'loss', 'content': 0.13297590613365173, 'timestamp': '2025-09-10 02:30:34.320776', 'step': 3764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:34.352670', 'step': 3764, 'epoch': 1} {'type': 'loss', 'content': 0.11524497717618942, 'timestamp': '2025-09-10 02:30:34.355209', 'step': 3765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:34.387797', 'step': 3765, 'epoch': 1} {'type': 'loss', 'content': 0.07163156569004059, 'timestamp': '2025-09-10 02:30:34.392384', 'step': 3766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:34.433319', 'step': 3766, 'epoch': 1} {'type': 'loss', 'content': 0.1159341111779213, 'timestamp': '2025-09-10 02:30:34.436150', 'step': 3767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:34.468465', 'step': 3767, 'epoch': 1} {'type': 'loss', 'content': 0.20781883597373962, 'timestamp': '2025-09-10 02:30:34.493582', 'step': 3768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:34.528001', 'step': 3768, 'epoch': 1} {'type': 'loss', 'content': 0.18553604185581207, 'timestamp': '2025-09-10 02:30:34.534958', 'step': 3769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:34.571157', 'step': 3769, 'epoch': 1} {'type': 'loss', 'content': 0.12055405229330063, 'timestamp': '2025-09-10 02:30:34.579803', 'step': 3770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:34.623924', 'step': 3770, 'epoch': 1} {'type': 'loss', 'content': 0.11993355304002762, 'timestamp': '2025-09-10 02:30:34.626219', 'step': 3771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:34.661941', 'step': 3771, 'epoch': 1} {'type': 'loss', 'content': 0.12910693883895874, 'timestamp': '2025-09-10 02:30:34.686112', 'step': 3772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:34.719597', 'step': 3772, 'epoch': 1} {'type': 'loss', 'content': 0.17208196222782135, 'timestamp': '2025-09-10 02:30:34.722328', 'step': 3773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:34.755988', 'step': 3773, 'epoch': 1} {'type': 'loss', 'content': 0.22928348183631897, 'timestamp': '2025-09-10 02:30:34.760465', 'step': 3774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:34.799422', 'step': 3774, 'epoch': 1} {'type': 'loss', 'content': 0.059122104197740555, 'timestamp': '2025-09-10 02:30:34.802505', 'step': 3775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:34.844745', 'step': 3775, 'epoch': 1} {'type': 'loss', 'content': 0.14134608209133148, 'timestamp': '2025-09-10 02:30:34.868681', 'step': 3776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:34.901600', 'step': 3776, 'epoch': 1} {'type': 'loss', 'content': 0.16350455582141876, 'timestamp': '2025-09-10 02:30:34.904284', 'step': 3777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:34.936408', 'step': 3777, 'epoch': 1} {'type': 'loss', 'content': 0.18094667792320251, 'timestamp': '2025-09-10 02:30:34.940619', 'step': 3778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:34.975963', 'step': 3778, 'epoch': 1} {'type': 'loss', 'content': 0.12147475779056549, 'timestamp': '2025-09-10 02:30:34.979055', 'step': 3779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:35.011028', 'step': 3779, 'epoch': 1} {'type': 'loss', 'content': 0.13560698926448822, 'timestamp': '2025-09-10 02:30:35.034658', 'step': 3780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:35.067174', 'step': 3780, 'epoch': 1} {'type': 'loss', 'content': 0.13640139997005463, 'timestamp': '2025-09-10 02:30:35.069705', 'step': 3781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.101496', 'step': 3781, 'epoch': 1} {'type': 'loss', 'content': 0.10113521665334702, 'timestamp': '2025-09-10 02:30:35.103827', 'step': 3782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.135356', 'step': 3782, 'epoch': 1} {'type': 'loss', 'content': 0.188189297914505, 'timestamp': '2025-09-10 02:30:35.138395', 'step': 3783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.172314', 'step': 3783, 'epoch': 1} {'type': 'loss', 'content': 0.1166093572974205, 'timestamp': '2025-09-10 02:30:35.196146', 'step': 3784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:35.228626', 'step': 3784, 'epoch': 1} {'type': 'loss', 'content': 0.18608863651752472, 'timestamp': '2025-09-10 02:30:35.230976', 'step': 3785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.262949', 'step': 3785, 'epoch': 1} {'type': 'loss', 'content': 0.09463272988796234, 'timestamp': '2025-09-10 02:30:35.265384', 'step': 3786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.297505', 'step': 3786, 'epoch': 1} {'type': 'loss', 'content': 0.14342762529850006, 'timestamp': '2025-09-10 02:30:35.300272', 'step': 3787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:35.331670', 'step': 3787, 'epoch': 1} {'type': 'loss', 'content': 0.16232335567474365, 'timestamp': '2025-09-10 02:30:35.356469', 'step': 3788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:35.389936', 'step': 3788, 'epoch': 1} {'type': 'loss', 'content': 0.20110595226287842, 'timestamp': '2025-09-10 02:30:35.392367', 'step': 3789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:35.423968', 'step': 3789, 'epoch': 1} {'type': 'loss', 'content': 0.1462220400571823, 'timestamp': '2025-09-10 02:30:35.426597', 'step': 3790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:35.457997', 'step': 3790, 'epoch': 1} {'type': 'loss', 'content': 0.14553384482860565, 'timestamp': '2025-09-10 02:30:35.460376', 'step': 3791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.492435', 'step': 3791, 'epoch': 1} {'type': 'loss', 'content': 0.1691882163286209, 'timestamp': '2025-09-10 02:30:35.516296', 'step': 3792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:35.547418', 'step': 3792, 'epoch': 1} {'type': 'loss', 'content': 0.25533434748649597, 'timestamp': '2025-09-10 02:30:35.549804', 'step': 3793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:35.582913', 'step': 3793, 'epoch': 1} {'type': 'loss', 'content': 0.18098293244838715, 'timestamp': '2025-09-10 02:30:35.585345', 'step': 3794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.618064', 'step': 3794, 'epoch': 1} {'type': 'loss', 'content': 0.21020446717739105, 'timestamp': '2025-09-10 02:30:35.620521', 'step': 3795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.650890', 'step': 3795, 'epoch': 1} {'type': 'loss', 'content': 0.15012416243553162, 'timestamp': '2025-09-10 02:30:35.674890', 'step': 3796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.709578', 'step': 3796, 'epoch': 1} {'type': 'loss', 'content': 0.0753297358751297, 'timestamp': '2025-09-10 02:30:35.711989', 'step': 3797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:35.743689', 'step': 3797, 'epoch': 1} {'type': 'loss', 'content': 0.16626568138599396, 'timestamp': '2025-09-10 02:30:35.746151', 'step': 3798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.777961', 'step': 3798, 'epoch': 1} {'type': 'loss', 'content': 0.2866518497467041, 'timestamp': '2025-09-10 02:30:35.780921', 'step': 3799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.812604', 'step': 3799, 'epoch': 1} {'type': 'loss', 'content': 0.08030354976654053, 'timestamp': '2025-09-10 02:30:35.836002', 'step': 3800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.869276', 'step': 3800, 'epoch': 1} {'type': 'loss', 'content': 0.11755923926830292, 'timestamp': '2025-09-10 02:30:35.871489', 'step': 3801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.903953', 'step': 3801, 'epoch': 1} {'type': 'loss', 'content': 0.12849906086921692, 'timestamp': '2025-09-10 02:30:35.906958', 'step': 3802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.940088', 'step': 3802, 'epoch': 1} {'type': 'loss', 'content': 0.12387958914041519, 'timestamp': '2025-09-10 02:30:35.943031', 'step': 3803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.975010', 'step': 3803, 'epoch': 1} {'type': 'loss', 'content': 0.20735925436019897, 'timestamp': '2025-09-10 02:30:35.998663', 'step': 3804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.036364', 'step': 3804, 'epoch': 1} {'type': 'loss', 'content': 0.16347509622573853, 'timestamp': '2025-09-10 02:30:36.040867', 'step': 3805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.077996', 'step': 3805, 'epoch': 1} {'type': 'loss', 'content': 0.1316630244255066, 'timestamp': '2025-09-10 02:30:36.081312', 'step': 3806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.113503', 'step': 3806, 'epoch': 1} {'type': 'loss', 'content': 0.23244158923625946, 'timestamp': '2025-09-10 02:30:36.115651', 'step': 3807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:36.148182', 'step': 3807, 'epoch': 1} {'type': 'loss', 'content': 0.08509618043899536, 'timestamp': '2025-09-10 02:30:36.172311', 'step': 3808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.206655', 'step': 3808, 'epoch': 1} {'type': 'loss', 'content': 0.16657480597496033, 'timestamp': '2025-09-10 02:30:36.209436', 'step': 3809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:36.241132', 'step': 3809, 'epoch': 1} {'type': 'loss', 'content': 0.17418751120567322, 'timestamp': '2025-09-10 02:30:36.245321', 'step': 3810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:36.277503', 'step': 3810, 'epoch': 1} {'type': 'loss', 'content': 0.12969177961349487, 'timestamp': '2025-09-10 02:30:36.280350', 'step': 3811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.312469', 'step': 3811, 'epoch': 1} {'type': 'loss', 'content': 0.1973484307527542, 'timestamp': '2025-09-10 02:30:36.336183', 'step': 3812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.369810', 'step': 3812, 'epoch': 1} {'type': 'loss', 'content': 0.16435939073562622, 'timestamp': '2025-09-10 02:30:36.372100', 'step': 3813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.403100', 'step': 3813, 'epoch': 1} {'type': 'loss', 'content': 0.1573863923549652, 'timestamp': '2025-09-10 02:30:36.405586', 'step': 3814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.437145', 'step': 3814, 'epoch': 1} {'type': 'loss', 'content': 0.115667924284935, 'timestamp': '2025-09-10 02:30:36.440030', 'step': 3815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:36.472100', 'step': 3815, 'epoch': 1} {'type': 'loss', 'content': 0.22145165503025055, 'timestamp': '2025-09-10 02:30:36.496132', 'step': 3816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.528334', 'step': 3816, 'epoch': 1} {'type': 'loss', 'content': 0.14848408102989197, 'timestamp': '2025-09-10 02:30:36.530792', 'step': 3817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:36.564740', 'step': 3817, 'epoch': 1} {'type': 'loss', 'content': 0.09646134078502655, 'timestamp': '2025-09-10 02:30:36.567293', 'step': 3818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:36.601364', 'step': 3818, 'epoch': 1} {'type': 'loss', 'content': 0.1727709025144577, 'timestamp': '2025-09-10 02:30:36.604137', 'step': 3819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.635410', 'step': 3819, 'epoch': 1} {'type': 'loss', 'content': 0.1493392139673233, 'timestamp': '2025-09-10 02:30:36.659312', 'step': 3820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.692650', 'step': 3820, 'epoch': 1} {'type': 'loss', 'content': 0.11656058579683304, 'timestamp': '2025-09-10 02:30:36.695091', 'step': 3821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.733209', 'step': 3821, 'epoch': 1} {'type': 'loss', 'content': 0.1828843206167221, 'timestamp': '2025-09-10 02:30:36.736501', 'step': 3822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.768467', 'step': 3822, 'epoch': 1} {'type': 'loss', 'content': 0.1033753901720047, 'timestamp': '2025-09-10 02:30:36.771262', 'step': 3823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.806083', 'step': 3823, 'epoch': 1} {'type': 'loss', 'content': 0.19713176786899567, 'timestamp': '2025-09-10 02:30:36.830323', 'step': 3824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.862918', 'step': 3824, 'epoch': 1} {'type': 'loss', 'content': 0.1009640321135521, 'timestamp': '2025-09-10 02:30:36.865358', 'step': 3825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:36.897802', 'step': 3825, 'epoch': 1} {'type': 'loss', 'content': 0.1872960776090622, 'timestamp': '2025-09-10 02:30:36.901981', 'step': 3826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.933900', 'step': 3826, 'epoch': 1} {'type': 'loss', 'content': 0.1300494372844696, 'timestamp': '2025-09-10 02:30:36.936218', 'step': 3827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.967695', 'step': 3827, 'epoch': 1} {'type': 'loss', 'content': 0.19911424815654755, 'timestamp': '2025-09-10 02:30:36.991355', 'step': 3828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:37.023450', 'step': 3828, 'epoch': 1} {'type': 'loss', 'content': 0.1439683586359024, 'timestamp': '2025-09-10 02:30:37.025884', 'step': 3829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.057271', 'step': 3829, 'epoch': 1} {'type': 'loss', 'content': 0.10356423258781433, 'timestamp': '2025-09-10 02:30:37.059650', 'step': 3830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.091018', 'step': 3830, 'epoch': 1} {'type': 'loss', 'content': 0.1527346968650818, 'timestamp': '2025-09-10 02:30:37.105435', 'step': 3831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:37.138751', 'step': 3831, 'epoch': 1} {'type': 'loss', 'content': 0.21173110604286194, 'timestamp': '2025-09-10 02:30:37.162447', 'step': 3832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.194414', 'step': 3832, 'epoch': 1} {'type': 'loss', 'content': 0.19191071391105652, 'timestamp': '2025-09-10 02:30:37.196867', 'step': 3833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.229086', 'step': 3833, 'epoch': 1} {'type': 'loss', 'content': 0.2048712968826294, 'timestamp': '2025-09-10 02:30:37.231319', 'step': 3834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:37.262952', 'step': 3834, 'epoch': 1} {'type': 'loss', 'content': 0.2255478799343109, 'timestamp': '2025-09-10 02:30:37.265391', 'step': 3835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:37.296630', 'step': 3835, 'epoch': 1} {'type': 'loss', 'content': 0.15573301911354065, 'timestamp': '2025-09-10 02:30:37.320594', 'step': 3836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.353323', 'step': 3836, 'epoch': 1} {'type': 'loss', 'content': 0.1795174777507782, 'timestamp': '2025-09-10 02:30:37.356235', 'step': 3837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:37.388281', 'step': 3837, 'epoch': 1} {'type': 'loss', 'content': 0.10908888280391693, 'timestamp': '2025-09-10 02:30:37.391444', 'step': 3838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.422935', 'step': 3838, 'epoch': 1} {'type': 'loss', 'content': 0.18115182220935822, 'timestamp': '2025-09-10 02:30:37.425384', 'step': 3839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.456937', 'step': 3839, 'epoch': 1} {'type': 'loss', 'content': 0.18198667466640472, 'timestamp': '2025-09-10 02:30:37.480662', 'step': 3840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:37.514130', 'step': 3840, 'epoch': 1} {'type': 'loss', 'content': 0.20148979127407074, 'timestamp': '2025-09-10 02:30:37.516508', 'step': 3841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:37.547346', 'step': 3841, 'epoch': 1} {'type': 'loss', 'content': 0.2104201763868332, 'timestamp': '2025-09-10 02:30:37.549813', 'step': 3842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.581642', 'step': 3842, 'epoch': 1} {'type': 'loss', 'content': 0.19758698344230652, 'timestamp': '2025-09-10 02:30:37.585584', 'step': 3843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:37.616900', 'step': 3843, 'epoch': 1} {'type': 'loss', 'content': 0.18672716617584229, 'timestamp': '2025-09-10 02:30:37.640451', 'step': 3844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:37.672869', 'step': 3844, 'epoch': 1} {'type': 'loss', 'content': 0.2542346715927124, 'timestamp': '2025-09-10 02:30:37.675207', 'step': 3845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:37.707172', 'step': 3845, 'epoch': 1} {'type': 'loss', 'content': 0.1797669678926468, 'timestamp': '2025-09-10 02:30:37.709504', 'step': 3846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.741180', 'step': 3846, 'epoch': 1} {'type': 'loss', 'content': 0.28706127405166626, 'timestamp': '2025-09-10 02:30:37.743469', 'step': 3847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:37.775035', 'step': 3847, 'epoch': 1} {'type': 'loss', 'content': 0.2500710189342499, 'timestamp': '2025-09-10 02:30:37.799286', 'step': 3848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:37.831889', 'step': 3848, 'epoch': 1} {'type': 'loss', 'content': 0.18904797732830048, 'timestamp': '2025-09-10 02:30:37.834503', 'step': 3849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:37.865649', 'step': 3849, 'epoch': 1} {'type': 'loss', 'content': 0.16143614053726196, 'timestamp': '2025-09-10 02:30:37.868815', 'step': 3850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.901898', 'step': 3850, 'epoch': 1} {'type': 'loss', 'content': 0.1986718326807022, 'timestamp': '2025-09-10 02:30:37.905984', 'step': 3851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:37.946148', 'step': 3851, 'epoch': 1} {'type': 'loss', 'content': 0.1899619996547699, 'timestamp': '2025-09-10 02:30:37.970093', 'step': 3852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:38.003688', 'step': 3852, 'epoch': 1} {'type': 'loss', 'content': 0.18308153748512268, 'timestamp': '2025-09-10 02:30:38.010183', 'step': 3853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:38.049012', 'step': 3853, 'epoch': 1} {'type': 'loss', 'content': 0.17360295355319977, 'timestamp': '2025-09-10 02:30:38.054026', 'step': 3854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:38.090362', 'step': 3854, 'epoch': 1} {'type': 'loss', 'content': 0.17499908804893494, 'timestamp': '2025-09-10 02:30:38.095503', 'step': 3855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:38.134719', 'step': 3855, 'epoch': 1} {'type': 'loss', 'content': 0.18104490637779236, 'timestamp': '2025-09-10 02:30:38.159550', 'step': 3856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:38.191000', 'step': 3856, 'epoch': 1} {'type': 'loss', 'content': 0.15942692756652832, 'timestamp': '2025-09-10 02:30:38.195159', 'step': 3857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:38.228456', 'step': 3857, 'epoch': 1} {'type': 'loss', 'content': 0.16069917380809784, 'timestamp': '2025-09-10 02:30:38.230870', 'step': 3858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:38.261856', 'step': 3858, 'epoch': 1} {'type': 'loss', 'content': 0.13363750278949738, 'timestamp': '2025-09-10 02:30:38.264206', 'step': 3859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:38.295820', 'step': 3859, 'epoch': 1} {'type': 'loss', 'content': 0.09559249877929688, 'timestamp': '2025-09-10 02:30:38.319736', 'step': 3860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:38.352000', 'step': 3860, 'epoch': 1} {'type': 'loss', 'content': 0.23869895935058594, 'timestamp': '2025-09-10 02:30:38.354478', 'step': 3861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:38.386132', 'step': 3861, 'epoch': 1} {'type': 'loss', 'content': 0.1786145120859146, 'timestamp': '2025-09-10 02:30:38.388506', 'step': 3862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:38.421202', 'step': 3862, 'epoch': 1} {'type': 'loss', 'content': 0.13536337018013, 'timestamp': '2025-09-10 02:30:38.423329', 'step': 3863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:38.454351', 'step': 3863, 'epoch': 1} {'type': 'loss', 'content': 0.08935926109552383, 'timestamp': '2025-09-10 02:30:38.478083', 'step': 3864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:38.510430', 'step': 3864, 'epoch': 1} {'type': 'loss', 'content': 0.21668143570423126, 'timestamp': '2025-09-10 02:30:38.513383', 'step': 3865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:38.546095', 'step': 3865, 'epoch': 1} {'type': 'loss', 'content': 0.2515595257282257, 'timestamp': '2025-09-10 02:30:38.549627', 'step': 3866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:38.581918', 'step': 3866, 'epoch': 1} {'type': 'loss', 'content': 0.1524156928062439, 'timestamp': '2025-09-10 02:30:38.584544', 'step': 3867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:38.616338', 'step': 3867, 'epoch': 1} {'type': 'loss', 'content': 0.1148868203163147, 'timestamp': '2025-09-10 02:30:38.639923', 'step': 3868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:38.672139', 'step': 3868, 'epoch': 1} {'type': 'loss', 'content': 0.20107220113277435, 'timestamp': '2025-09-10 02:30:38.674678', 'step': 3869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:38.706696', 'step': 3869, 'epoch': 1} {'type': 'loss', 'content': 0.18782715499401093, 'timestamp': '2025-09-10 02:30:38.708869', 'step': 3870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:38.740336', 'step': 3870, 'epoch': 1} {'type': 'loss', 'content': 0.13059701025485992, 'timestamp': '2025-09-10 02:30:38.742344', 'step': 3871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:38.773386', 'step': 3871, 'epoch': 1} {'type': 'loss', 'content': 0.2685604691505432, 'timestamp': '2025-09-10 02:30:38.798013', 'step': 3872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:38.844827', 'step': 3872, 'epoch': 1} {'type': 'loss', 'content': 0.2634928226470947, 'timestamp': '2025-09-10 02:30:38.846943', 'step': 3873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:38.880104', 'step': 3873, 'epoch': 1} {'type': 'loss', 'content': 0.1419406235218048, 'timestamp': '2025-09-10 02:30:38.882706', 'step': 3874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:38.916561', 'step': 3874, 'epoch': 1} {'type': 'loss', 'content': 0.09602735191583633, 'timestamp': '2025-09-10 02:30:38.919755', 'step': 3875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:38.966660', 'step': 3875, 'epoch': 1} {'type': 'loss', 'content': 0.12474636733531952, 'timestamp': '2025-09-10 02:30:38.990184', 'step': 3876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.026108', 'step': 3876, 'epoch': 1} {'type': 'loss', 'content': 0.12892195582389832, 'timestamp': '2025-09-10 02:30:39.028371', 'step': 3877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:39.059207', 'step': 3877, 'epoch': 1} {'type': 'loss', 'content': 0.10898943990468979, 'timestamp': '2025-09-10 02:30:39.061450', 'step': 3878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.093616', 'step': 3878, 'epoch': 1} {'type': 'loss', 'content': 0.2060815840959549, 'timestamp': '2025-09-10 02:30:39.095744', 'step': 3879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.126723', 'step': 3879, 'epoch': 1} {'type': 'loss', 'content': 0.10454711318016052, 'timestamp': '2025-09-10 02:30:39.150147', 'step': 3880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.184412', 'step': 3880, 'epoch': 1} {'type': 'loss', 'content': 0.2004450112581253, 'timestamp': '2025-09-10 02:30:39.186504', 'step': 3881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:39.217911', 'step': 3881, 'epoch': 1} {'type': 'loss', 'content': 0.17023208737373352, 'timestamp': '2025-09-10 02:30:39.223713', 'step': 3882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:39.263318', 'step': 3882, 'epoch': 1} {'type': 'loss', 'content': 0.12561503052711487, 'timestamp': '2025-09-10 02:30:39.267121', 'step': 3883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:39.300542', 'step': 3883, 'epoch': 1} {'type': 'loss', 'content': 0.1962878257036209, 'timestamp': '2025-09-10 02:30:39.326277', 'step': 3884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:39.357506', 'step': 3884, 'epoch': 1} {'type': 'loss', 'content': 0.236554816365242, 'timestamp': '2025-09-10 02:30:39.359743', 'step': 3885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:39.392066', 'step': 3885, 'epoch': 1} {'type': 'loss', 'content': 0.12712997198104858, 'timestamp': '2025-09-10 02:30:39.396989', 'step': 3886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.428947', 'step': 3886, 'epoch': 1} {'type': 'loss', 'content': 0.12808458507061005, 'timestamp': '2025-09-10 02:30:39.433164', 'step': 3887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:39.465247', 'step': 3887, 'epoch': 1} {'type': 'loss', 'content': 0.183441162109375, 'timestamp': '2025-09-10 02:30:39.488754', 'step': 3888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:39.520338', 'step': 3888, 'epoch': 1} {'type': 'loss', 'content': 0.20606715977191925, 'timestamp': '2025-09-10 02:30:39.522236', 'step': 3889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:39.552719', 'step': 3889, 'epoch': 1} {'type': 'loss', 'content': 0.1812993437051773, 'timestamp': '2025-09-10 02:30:39.559255', 'step': 3890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:39.594772', 'step': 3890, 'epoch': 1} {'type': 'loss', 'content': 0.17818139493465424, 'timestamp': '2025-09-10 02:30:39.600121', 'step': 3891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:39.638259', 'step': 3891, 'epoch': 1} {'type': 'loss', 'content': 0.18508215248584747, 'timestamp': '2025-09-10 02:30:39.661559', 'step': 3892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:39.692956', 'step': 3892, 'epoch': 1} {'type': 'loss', 'content': 0.1249084621667862, 'timestamp': '2025-09-10 02:30:39.695487', 'step': 3893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:39.730463', 'step': 3893, 'epoch': 1} {'type': 'loss', 'content': 0.15816453099250793, 'timestamp': '2025-09-10 02:30:39.734921', 'step': 3894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:39.769527', 'step': 3894, 'epoch': 1} {'type': 'loss', 'content': 0.14946623146533966, 'timestamp': '2025-09-10 02:30:39.771824', 'step': 3895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:39.806055', 'step': 3895, 'epoch': 1} {'type': 'loss', 'content': 0.14194491505622864, 'timestamp': '2025-09-10 02:30:39.829560', 'step': 3896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:39.861729', 'step': 3896, 'epoch': 1} {'type': 'loss', 'content': 0.1161305382847786, 'timestamp': '2025-09-10 02:30:39.864923', 'step': 3897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:39.899877', 'step': 3897, 'epoch': 1} {'type': 'loss', 'content': 0.15285789966583252, 'timestamp': '2025-09-10 02:30:39.901783', 'step': 3898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.933466', 'step': 3898, 'epoch': 1} {'type': 'loss', 'content': 0.22609761357307434, 'timestamp': '2025-09-10 02:30:39.936593', 'step': 3899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:39.972807', 'step': 3899, 'epoch': 1} {'type': 'loss', 'content': 0.18294551968574524, 'timestamp': '2025-09-10 02:30:39.996249', 'step': 3900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:40.035775', 'step': 3900, 'epoch': 1} {'type': 'loss', 'content': 0.19218745827674866, 'timestamp': '2025-09-10 02:30:40.037906', 'step': 3901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:40.069982', 'step': 3901, 'epoch': 1} {'type': 'loss', 'content': 0.10273370146751404, 'timestamp': '2025-09-10 02:30:40.073302', 'step': 3902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:40.106140', 'step': 3902, 'epoch': 1} {'type': 'loss', 'content': 0.14678016304969788, 'timestamp': '2025-09-10 02:30:40.108507', 'step': 3903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:40.139201', 'step': 3903, 'epoch': 1} {'type': 'loss', 'content': 0.1390053927898407, 'timestamp': '2025-09-10 02:30:40.162321', 'step': 3904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:40.195472', 'step': 3904, 'epoch': 1} {'type': 'loss', 'content': 0.18665623664855957, 'timestamp': '2025-09-10 02:30:40.199976', 'step': 3905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.233118', 'step': 3905, 'epoch': 1} {'type': 'loss', 'content': 0.13664127886295319, 'timestamp': '2025-09-10 02:30:40.235289', 'step': 3906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:40.266998', 'step': 3906, 'epoch': 1} {'type': 'loss', 'content': 0.1790093630552292, 'timestamp': '2025-09-10 02:30:40.269811', 'step': 3907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:40.301630', 'step': 3907, 'epoch': 1} {'type': 'loss', 'content': 0.111982062458992, 'timestamp': '2025-09-10 02:30:40.325667', 'step': 3908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.357333', 'step': 3908, 'epoch': 1} {'type': 'loss', 'content': 0.20332619547843933, 'timestamp': '2025-09-10 02:30:40.359310', 'step': 3909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:40.392866', 'step': 3909, 'epoch': 1} {'type': 'loss', 'content': 0.17323067784309387, 'timestamp': '2025-09-10 02:30:40.396250', 'step': 3910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:40.431664', 'step': 3910, 'epoch': 1} {'type': 'loss', 'content': 0.16024397313594818, 'timestamp': '2025-09-10 02:30:40.433701', 'step': 3911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:40.466196', 'step': 3911, 'epoch': 1} {'type': 'loss', 'content': 0.1439538449048996, 'timestamp': '2025-09-10 02:30:40.491881', 'step': 3912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:40.531448', 'step': 3912, 'epoch': 1} {'type': 'loss', 'content': 0.1294115036725998, 'timestamp': '2025-09-10 02:30:40.533737', 'step': 3913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:40.567135', 'step': 3913, 'epoch': 1} {'type': 'loss', 'content': 0.13454972207546234, 'timestamp': '2025-09-10 02:30:40.571267', 'step': 3914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:40.605101', 'step': 3914, 'epoch': 1} {'type': 'loss', 'content': 0.18079547584056854, 'timestamp': '2025-09-10 02:30:40.609261', 'step': 3915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.641696', 'step': 3915, 'epoch': 1} {'type': 'loss', 'content': 0.10110573470592499, 'timestamp': '2025-09-10 02:30:40.666526', 'step': 3916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:40.700790', 'step': 3916, 'epoch': 1} {'type': 'loss', 'content': 0.23132747411727905, 'timestamp': '2025-09-10 02:30:40.702872', 'step': 3917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:40.739085', 'step': 3917, 'epoch': 1} {'type': 'loss', 'content': 0.24574726819992065, 'timestamp': '2025-09-10 02:30:40.741510', 'step': 3918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:40.780483', 'step': 3918, 'epoch': 1} {'type': 'loss', 'content': 0.2642548978328705, 'timestamp': '2025-09-10 02:30:40.782912', 'step': 3919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.813511', 'step': 3919, 'epoch': 1} {'type': 'loss', 'content': 0.1990572065114975, 'timestamp': '2025-09-10 02:30:40.836923', 'step': 3920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:40.870309', 'step': 3920, 'epoch': 1} {'type': 'loss', 'content': 0.12275093048810959, 'timestamp': '2025-09-10 02:30:40.873144', 'step': 3921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:40.903987', 'step': 3921, 'epoch': 1} {'type': 'loss', 'content': 0.1946115344762802, 'timestamp': '2025-09-10 02:30:40.906768', 'step': 3922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:40.941665', 'step': 3922, 'epoch': 1} {'type': 'loss', 'content': 0.13320492208003998, 'timestamp': '2025-09-10 02:30:40.944647', 'step': 3923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.979555', 'step': 3923, 'epoch': 1} {'type': 'loss', 'content': 0.13275869190692902, 'timestamp': '2025-09-10 02:30:41.005501', 'step': 3924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.038476', 'step': 3924, 'epoch': 1} {'type': 'loss', 'content': 0.12282407283782959, 'timestamp': '2025-09-10 02:30:41.040618', 'step': 3925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.071841', 'step': 3925, 'epoch': 1} {'type': 'loss', 'content': 0.18326450884342194, 'timestamp': '2025-09-10 02:30:41.074517', 'step': 3926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.106359', 'step': 3926, 'epoch': 1} {'type': 'loss', 'content': 0.15976428985595703, 'timestamp': '2025-09-10 02:30:41.108241', 'step': 3927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.142876', 'step': 3927, 'epoch': 1} {'type': 'loss', 'content': 0.16197971999645233, 'timestamp': '2025-09-10 02:30:41.166255', 'step': 3928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.207605', 'step': 3928, 'epoch': 1} {'type': 'loss', 'content': 0.12126778066158295, 'timestamp': '2025-09-10 02:30:41.209734', 'step': 3929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:41.242505', 'step': 3929, 'epoch': 1} {'type': 'loss', 'content': 0.12703180313110352, 'timestamp': '2025-09-10 02:30:41.246113', 'step': 3930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.278428', 'step': 3930, 'epoch': 1} {'type': 'loss', 'content': 0.23722535371780396, 'timestamp': '2025-09-10 02:30:41.281037', 'step': 3931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.315773', 'step': 3931, 'epoch': 1} {'type': 'loss', 'content': 0.14776866137981415, 'timestamp': '2025-09-10 02:30:41.338955', 'step': 3932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:41.370859', 'step': 3932, 'epoch': 1} {'type': 'loss', 'content': 0.16710346937179565, 'timestamp': '2025-09-10 02:30:41.373347', 'step': 3933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.406051', 'step': 3933, 'epoch': 1} {'type': 'loss', 'content': 0.14090405404567719, 'timestamp': '2025-09-10 02:30:41.416417', 'step': 3934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.452909', 'step': 3934, 'epoch': 1} {'type': 'loss', 'content': 0.2192622572183609, 'timestamp': '2025-09-10 02:30:41.456325', 'step': 3935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.492658', 'step': 3935, 'epoch': 1} {'type': 'loss', 'content': 0.08932220190763474, 'timestamp': '2025-09-10 02:30:41.517233', 'step': 3936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.550822', 'step': 3936, 'epoch': 1} {'type': 'loss', 'content': 0.15426935255527496, 'timestamp': '2025-09-10 02:30:41.555369', 'step': 3937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.595468', 'step': 3937, 'epoch': 1} {'type': 'loss', 'content': 0.16728681325912476, 'timestamp': '2025-09-10 02:30:41.599248', 'step': 3938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:41.637289', 'step': 3938, 'epoch': 1} {'type': 'loss', 'content': 0.1975630670785904, 'timestamp': '2025-09-10 02:30:41.639719', 'step': 3939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:41.672094', 'step': 3939, 'epoch': 1} {'type': 'loss', 'content': 0.1907169073820114, 'timestamp': '2025-09-10 02:30:41.700238', 'step': 3940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:41.734994', 'step': 3940, 'epoch': 1} {'type': 'loss', 'content': 0.21595658361911774, 'timestamp': '2025-09-10 02:30:41.739217', 'step': 3941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.771147', 'step': 3941, 'epoch': 1} {'type': 'loss', 'content': 0.21102400124073029, 'timestamp': '2025-09-10 02:30:41.773544', 'step': 3942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:41.804322', 'step': 3942, 'epoch': 1} {'type': 'loss', 'content': 0.19190917909145355, 'timestamp': '2025-09-10 02:30:41.807378', 'step': 3943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.841573', 'step': 3943, 'epoch': 1} {'type': 'loss', 'content': 0.21203505992889404, 'timestamp': '2025-09-10 02:30:41.865131', 'step': 3944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.902788', 'step': 3944, 'epoch': 1} {'type': 'loss', 'content': 0.18768556416034698, 'timestamp': '2025-09-10 02:30:41.907663', 'step': 3945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:41.944477', 'step': 3945, 'epoch': 1} {'type': 'loss', 'content': 0.19331756234169006, 'timestamp': '2025-09-10 02:30:41.947564', 'step': 3946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:41.982427', 'step': 3946, 'epoch': 1} {'type': 'loss', 'content': 0.1699475347995758, 'timestamp': '2025-09-10 02:30:41.986850', 'step': 3947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.020678', 'step': 3947, 'epoch': 1} {'type': 'loss', 'content': 0.22561495006084442, 'timestamp': '2025-09-10 02:30:42.044850', 'step': 3948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:42.076042', 'step': 3948, 'epoch': 1} {'type': 'loss', 'content': 0.14068971574306488, 'timestamp': '2025-09-10 02:30:42.078010', 'step': 3949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.110669', 'step': 3949, 'epoch': 1} {'type': 'loss', 'content': 0.1605885922908783, 'timestamp': '2025-09-10 02:30:42.114973', 'step': 3950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.149911', 'step': 3950, 'epoch': 1} {'type': 'loss', 'content': 0.3249995708465576, 'timestamp': '2025-09-10 02:30:42.152329', 'step': 3951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:42.198118', 'step': 3951, 'epoch': 1} {'type': 'loss', 'content': 0.15623801946640015, 'timestamp': '2025-09-10 02:30:42.221668', 'step': 3952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.260923', 'step': 3952, 'epoch': 1} {'type': 'loss', 'content': 0.22645626962184906, 'timestamp': '2025-09-10 02:30:42.266809', 'step': 3953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:42.301936', 'step': 3953, 'epoch': 1} {'type': 'loss', 'content': 0.1228109672665596, 'timestamp': '2025-09-10 02:30:42.304737', 'step': 3954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:42.341047', 'step': 3954, 'epoch': 1} {'type': 'loss', 'content': 0.11320241540670395, 'timestamp': '2025-09-10 02:30:42.342606', 'step': 3955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.376597', 'step': 3955, 'epoch': 1} {'type': 'loss', 'content': 0.09162913262844086, 'timestamp': '2025-09-10 02:30:42.400591', 'step': 3956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:42.435938', 'step': 3956, 'epoch': 1} {'type': 'loss', 'content': 0.12076813727617264, 'timestamp': '2025-09-10 02:30:42.438155', 'step': 3957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.471813', 'step': 3957, 'epoch': 1} {'type': 'loss', 'content': 0.18420732021331787, 'timestamp': '2025-09-10 02:30:42.476879', 'step': 3958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.512937', 'step': 3958, 'epoch': 1} {'type': 'loss', 'content': 0.182316392660141, 'timestamp': '2025-09-10 02:30:42.517400', 'step': 3959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.550057', 'step': 3959, 'epoch': 1} {'type': 'loss', 'content': 0.10509555041790009, 'timestamp': '2025-09-10 02:30:42.577832', 'step': 3960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.611898', 'step': 3960, 'epoch': 1} {'type': 'loss', 'content': 0.2214735746383667, 'timestamp': '2025-09-10 02:30:42.617093', 'step': 3961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.654655', 'step': 3961, 'epoch': 1} {'type': 'loss', 'content': 0.17147251963615417, 'timestamp': '2025-09-10 02:30:42.657238', 'step': 3962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:42.690083', 'step': 3962, 'epoch': 1} {'type': 'loss', 'content': 0.18323121964931488, 'timestamp': '2025-09-10 02:30:42.692527', 'step': 3963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:42.724118', 'step': 3963, 'epoch': 1} {'type': 'loss', 'content': 0.13533857464790344, 'timestamp': '2025-09-10 02:30:42.748059', 'step': 3964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.781139', 'step': 3964, 'epoch': 1} {'type': 'loss', 'content': 0.1442348062992096, 'timestamp': '2025-09-10 02:30:42.783727', 'step': 3965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.817034', 'step': 3965, 'epoch': 1} {'type': 'loss', 'content': 0.22337812185287476, 'timestamp': '2025-09-10 02:30:42.819103', 'step': 3966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.850083', 'step': 3966, 'epoch': 1} {'type': 'loss', 'content': 0.09390285611152649, 'timestamp': '2025-09-10 02:30:42.853646', 'step': 3967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:42.896592', 'step': 3967, 'epoch': 1} {'type': 'loss', 'content': 0.1520547866821289, 'timestamp': '2025-09-10 02:30:42.920123', 'step': 3968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.955144', 'step': 3968, 'epoch': 1} {'type': 'loss', 'content': 0.2829248309135437, 'timestamp': '2025-09-10 02:30:42.962151', 'step': 3969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.002827', 'step': 3969, 'epoch': 1} {'type': 'loss', 'content': 0.1943046897649765, 'timestamp': '2025-09-10 02:30:43.005158', 'step': 3970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:43.049864', 'step': 3970, 'epoch': 1} {'type': 'loss', 'content': 0.11809173226356506, 'timestamp': '2025-09-10 02:30:43.055707', 'step': 3971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:43.092144', 'step': 3971, 'epoch': 1} {'type': 'loss', 'content': 0.17260046303272247, 'timestamp': '2025-09-10 02:30:43.115978', 'step': 3972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:43.148671', 'step': 3972, 'epoch': 1} {'type': 'loss', 'content': 0.20609444379806519, 'timestamp': '2025-09-10 02:30:43.152419', 'step': 3973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:43.186953', 'step': 3973, 'epoch': 1} {'type': 'loss', 'content': 0.16217517852783203, 'timestamp': '2025-09-10 02:30:43.189323', 'step': 3974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:43.221542', 'step': 3974, 'epoch': 1} {'type': 'loss', 'content': 0.2061905413866043, 'timestamp': '2025-09-10 02:30:43.223625', 'step': 3975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:43.257869', 'step': 3975, 'epoch': 1} {'type': 'loss', 'content': 0.14371924102306366, 'timestamp': '2025-09-10 02:30:43.282227', 'step': 3976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.314123', 'step': 3976, 'epoch': 1} {'type': 'loss', 'content': 0.162789985537529, 'timestamp': '2025-09-10 02:30:43.316497', 'step': 3977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.348520', 'step': 3977, 'epoch': 1} {'type': 'loss', 'content': 0.14587414264678955, 'timestamp': '2025-09-10 02:30:43.350454', 'step': 3978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:43.385081', 'step': 3978, 'epoch': 1} {'type': 'loss', 'content': 0.1761462539434433, 'timestamp': '2025-09-10 02:30:43.387606', 'step': 3979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:43.419104', 'step': 3979, 'epoch': 1} {'type': 'loss', 'content': 0.19258050620555878, 'timestamp': '2025-09-10 02:30:43.442962', 'step': 3980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:43.475905', 'step': 3980, 'epoch': 1} {'type': 'loss', 'content': 0.10481608659029007, 'timestamp': '2025-09-10 02:30:43.478436', 'step': 3981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:30:43.519143', 'step': 3981, 'epoch': 1} {'type': 'loss', 'content': 0.158010333776474, 'timestamp': '2025-09-10 02:30:43.522935', 'step': 3982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:43.554974', 'step': 3982, 'epoch': 1} {'type': 'loss', 'content': 0.131734699010849, 'timestamp': '2025-09-10 02:30:43.563722', 'step': 3983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:43.595449', 'step': 3983, 'epoch': 1} {'type': 'loss', 'content': 0.09453931450843811, 'timestamp': '2025-09-10 02:30:43.619315', 'step': 3984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:43.652887', 'step': 3984, 'epoch': 1} {'type': 'loss', 'content': 0.1453840732574463, 'timestamp': '2025-09-10 02:30:43.656922', 'step': 3985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:43.692550', 'step': 3985, 'epoch': 1} {'type': 'loss', 'content': 0.12769319117069244, 'timestamp': '2025-09-10 02:30:43.697505', 'step': 3986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:43.730076', 'step': 3986, 'epoch': 1} {'type': 'loss', 'content': 0.1154169961810112, 'timestamp': '2025-09-10 02:30:43.731877', 'step': 3987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.762564', 'step': 3987, 'epoch': 1} {'type': 'loss', 'content': 0.09300840646028519, 'timestamp': '2025-09-10 02:30:43.794316', 'step': 3988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:43.826462', 'step': 3988, 'epoch': 1} {'type': 'loss', 'content': 0.16524910926818848, 'timestamp': '2025-09-10 02:30:43.828730', 'step': 3989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:30:43.860030', 'step': 3989, 'epoch': 1} {'type': 'loss', 'content': 0.13472586870193481, 'timestamp': '2025-09-10 02:30:43.863963', 'step': 3990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.898914', 'step': 3990, 'epoch': 1} {'type': 'loss', 'content': 0.1813565343618393, 'timestamp': '2025-09-10 02:30:43.902258', 'step': 3991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:43.935798', 'step': 3991, 'epoch': 1} {'type': 'loss', 'content': 0.12980729341506958, 'timestamp': '2025-09-10 02:30:43.959275', 'step': 3992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.014949', 'step': 3992, 'epoch': 1} {'type': 'loss', 'content': 0.10620493441820145, 'timestamp': '2025-09-10 02:30:44.017265', 'step': 3993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:44.049595', 'step': 3993, 'epoch': 1} {'type': 'loss', 'content': 0.14653046429157257, 'timestamp': '2025-09-10 02:30:44.052383', 'step': 3994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:44.084603', 'step': 3994, 'epoch': 1} {'type': 'loss', 'content': 0.13193272054195404, 'timestamp': '2025-09-10 02:30:44.086913', 'step': 3995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:44.120104', 'step': 3995, 'epoch': 1} {'type': 'loss', 'content': 0.19667476415634155, 'timestamp': '2025-09-10 02:30:44.144300', 'step': 3996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.176128', 'step': 3996, 'epoch': 1} {'type': 'loss', 'content': 0.13852731883525848, 'timestamp': '2025-09-10 02:30:44.178651', 'step': 3997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:44.220327', 'step': 3997, 'epoch': 1} {'type': 'loss', 'content': 0.223822221159935, 'timestamp': '2025-09-10 02:30:44.222601', 'step': 3998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:44.254772', 'step': 3998, 'epoch': 1} {'type': 'loss', 'content': 0.13085654377937317, 'timestamp': '2025-09-10 02:30:44.257096', 'step': 3999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:44.290010', 'step': 3999, 'epoch': 1} {'type': 'loss', 'content': 0.2129199206829071, 'timestamp': '2025-09-10 02:30:44.313399', 'step': 4000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-09-10 02:30:49.927256', 'step': 4000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:49.961635', 'step': 4000, 'epoch': 1} {'type': 'loss', 'content': 0.15634910762310028, 'timestamp': '2025-09-10 02:30:49.964684', 'step': 4001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.997359', 'step': 4001, 'epoch': 1} {'type': 'loss', 'content': 0.11737383157014847, 'timestamp': '2025-09-10 02:30:50.000592', 'step': 4002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:50.035732', 'step': 4002, 'epoch': 1} {'type': 'loss', 'content': 0.1374484747648239, 'timestamp': '2025-09-10 02:30:50.039339', 'step': 4003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:50.086939', 'step': 4003, 'epoch': 1} {'type': 'loss', 'content': 0.20805181562900543, 'timestamp': '2025-09-10 02:30:50.110305', 'step': 4004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:50.143338', 'step': 4004, 'epoch': 1} {'type': 'loss', 'content': 0.21068809926509857, 'timestamp': '2025-09-10 02:30:50.145807', 'step': 4005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:50.180546', 'step': 4005, 'epoch': 1} {'type': 'loss', 'content': 0.20670904219150543, 'timestamp': '2025-09-10 02:30:50.183085', 'step': 4006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.215025', 'step': 4006, 'epoch': 1} {'type': 'loss', 'content': 0.1710900515317917, 'timestamp': '2025-09-10 02:30:50.217183', 'step': 4007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:50.249783', 'step': 4007, 'epoch': 1} {'type': 'loss', 'content': 0.13611000776290894, 'timestamp': '2025-09-10 02:30:50.273568', 'step': 4008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.308764', 'step': 4008, 'epoch': 1} {'type': 'loss', 'content': 0.2308662086725235, 'timestamp': '2025-09-10 02:30:50.311239', 'step': 4009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:50.343383', 'step': 4009, 'epoch': 1} {'type': 'loss', 'content': 0.12109295278787613, 'timestamp': '2025-09-10 02:30:50.346206', 'step': 4010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:50.380881', 'step': 4010, 'epoch': 1} {'type': 'loss', 'content': 0.22895343601703644, 'timestamp': '2025-09-10 02:30:50.383393', 'step': 4011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.416141', 'step': 4011, 'epoch': 1} {'type': 'loss', 'content': 0.10315114259719849, 'timestamp': '2025-09-10 02:30:50.441064', 'step': 4012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.477944', 'step': 4012, 'epoch': 1} {'type': 'loss', 'content': 0.2572678029537201, 'timestamp': '2025-09-10 02:30:50.481856', 'step': 4013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:50.514882', 'step': 4013, 'epoch': 1} {'type': 'loss', 'content': 0.13272036612033844, 'timestamp': '2025-09-10 02:30:50.517016', 'step': 4014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:50.548463', 'step': 4014, 'epoch': 1} {'type': 'loss', 'content': 0.14795571565628052, 'timestamp': '2025-09-10 02:30:50.550919', 'step': 4015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:50.584121', 'step': 4015, 'epoch': 1} {'type': 'loss', 'content': 0.1943153440952301, 'timestamp': '2025-09-10 02:30:50.608074', 'step': 4016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:50.640988', 'step': 4016, 'epoch': 1} {'type': 'loss', 'content': 0.10103365033864975, 'timestamp': '2025-09-10 02:30:50.644814', 'step': 4017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:50.676084', 'step': 4017, 'epoch': 1} {'type': 'loss', 'content': 0.14627547562122345, 'timestamp': '2025-09-10 02:30:50.682513', 'step': 4018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:50.739219', 'step': 4018, 'epoch': 1} {'type': 'loss', 'content': 0.16807225346565247, 'timestamp': '2025-09-10 02:30:50.742653', 'step': 4019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:50.776576', 'step': 4019, 'epoch': 1} {'type': 'loss', 'content': 0.10647650808095932, 'timestamp': '2025-09-10 02:30:50.803162', 'step': 4020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.842442', 'step': 4020, 'epoch': 1} {'type': 'loss', 'content': 0.14131955802440643, 'timestamp': '2025-09-10 02:30:50.846912', 'step': 4021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:50.888652', 'step': 4021, 'epoch': 1} {'type': 'loss', 'content': 0.31758204102516174, 'timestamp': '2025-09-10 02:30:50.892636', 'step': 4022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:50.948675', 'step': 4022, 'epoch': 1} {'type': 'loss', 'content': 0.12387603521347046, 'timestamp': '2025-09-10 02:30:50.970019', 'step': 4023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:51.059074', 'step': 4023, 'epoch': 1} {'type': 'loss', 'content': 0.1946994960308075, 'timestamp': '2025-09-10 02:30:51.085129', 'step': 4024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.130618', 'step': 4024, 'epoch': 1} {'type': 'loss', 'content': 0.11671885848045349, 'timestamp': '2025-09-10 02:30:51.141026', 'step': 4025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.186552', 'step': 4025, 'epoch': 1} {'type': 'loss', 'content': 0.13575464487075806, 'timestamp': '2025-09-10 02:30:51.189080', 'step': 4026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.223992', 'step': 4026, 'epoch': 1} {'type': 'loss', 'content': 0.1717621088027954, 'timestamp': '2025-09-10 02:30:51.240303', 'step': 4027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.326787', 'step': 4027, 'epoch': 1} {'type': 'loss', 'content': 0.15919619798660278, 'timestamp': '2025-09-10 02:30:51.353507', 'step': 4028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.386867', 'step': 4028, 'epoch': 1} {'type': 'loss', 'content': 0.19768279790878296, 'timestamp': '2025-09-10 02:30:51.390299', 'step': 4029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:51.422864', 'step': 4029, 'epoch': 1} {'type': 'loss', 'content': 0.14776171743869781, 'timestamp': '2025-09-10 02:30:51.427348', 'step': 4030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:51.461413', 'step': 4030, 'epoch': 1} {'type': 'loss', 'content': 0.17344684898853302, 'timestamp': '2025-09-10 02:30:51.465184', 'step': 4031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:51.512443', 'step': 4031, 'epoch': 1} {'type': 'loss', 'content': 0.17137114703655243, 'timestamp': '2025-09-10 02:30:51.536828', 'step': 4032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:51.570423', 'step': 4032, 'epoch': 1} {'type': 'loss', 'content': 0.17705734074115753, 'timestamp': '2025-09-10 02:30:51.572900', 'step': 4033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:51.604667', 'step': 4033, 'epoch': 1} {'type': 'loss', 'content': 0.18991215527057648, 'timestamp': '2025-09-10 02:30:51.619896', 'step': 4034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.720104', 'step': 4034, 'epoch': 1} {'type': 'loss', 'content': 0.17993289232254028, 'timestamp': '2025-09-10 02:30:51.737364', 'step': 4035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:51.820528', 'step': 4035, 'epoch': 1} {'type': 'loss', 'content': 0.23619617521762848, 'timestamp': '2025-09-10 02:30:51.856624', 'step': 4036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.910748', 'step': 4036, 'epoch': 1} {'type': 'loss', 'content': 0.17733058333396912, 'timestamp': '2025-09-10 02:30:51.917780', 'step': 4037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.958367', 'step': 4037, 'epoch': 1} {'type': 'loss', 'content': 0.16715610027313232, 'timestamp': '2025-09-10 02:30:51.960981', 'step': 4038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.994850', 'step': 4038, 'epoch': 1} {'type': 'loss', 'content': 0.1479576975107193, 'timestamp': '2025-09-10 02:30:52.000184', 'step': 4039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.034906', 'step': 4039, 'epoch': 1} {'type': 'loss', 'content': 0.0952206701040268, 'timestamp': '2025-09-10 02:30:52.060415', 'step': 4040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.100132', 'step': 4040, 'epoch': 1} {'type': 'loss', 'content': 0.1586407870054245, 'timestamp': '2025-09-10 02:30:52.111112', 'step': 4041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.153080', 'step': 4041, 'epoch': 1} {'type': 'loss', 'content': 0.13151107728481293, 'timestamp': '2025-09-10 02:30:52.167677', 'step': 4042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.246467', 'step': 4042, 'epoch': 1} {'type': 'loss', 'content': 0.20426134765148163, 'timestamp': '2025-09-10 02:30:52.264097', 'step': 4043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:52.328504', 'step': 4043, 'epoch': 1} {'type': 'loss', 'content': 0.161401629447937, 'timestamp': '2025-09-10 02:30:52.369899', 'step': 4044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:52.442362', 'step': 4044, 'epoch': 1} {'type': 'loss', 'content': 0.14877933263778687, 'timestamp': '2025-09-10 02:30:52.445940', 'step': 4045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.481048', 'step': 4045, 'epoch': 1} {'type': 'loss', 'content': 0.11194940656423569, 'timestamp': '2025-09-10 02:30:52.485955', 'step': 4046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.526373', 'step': 4046, 'epoch': 1} {'type': 'loss', 'content': 0.12978409230709076, 'timestamp': '2025-09-10 02:30:52.535406', 'step': 4047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.578824', 'step': 4047, 'epoch': 1} {'type': 'loss', 'content': 0.22451114654541016, 'timestamp': '2025-09-10 02:30:52.603444', 'step': 4048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.643291', 'step': 4048, 'epoch': 1} {'type': 'loss', 'content': 0.1285315603017807, 'timestamp': '2025-09-10 02:30:52.646103', 'step': 4049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.680103', 'step': 4049, 'epoch': 1} {'type': 'loss', 'content': 0.20096446573734283, 'timestamp': '2025-09-10 02:30:52.684341', 'step': 4050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.715666', 'step': 4050, 'epoch': 1} {'type': 'loss', 'content': 0.1525275707244873, 'timestamp': '2025-09-10 02:30:52.718310', 'step': 4051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:52.753527', 'step': 4051, 'epoch': 1} {'type': 'loss', 'content': 0.1766415238380432, 'timestamp': '2025-09-10 02:30:52.777446', 'step': 4052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.815225', 'step': 4052, 'epoch': 1} {'type': 'loss', 'content': 0.07945606857538223, 'timestamp': '2025-09-10 02:30:52.818855', 'step': 4053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.850315', 'step': 4053, 'epoch': 1} {'type': 'loss', 'content': 0.2376088798046112, 'timestamp': '2025-09-10 02:30:52.852778', 'step': 4054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:52.884630', 'step': 4054, 'epoch': 1} {'type': 'loss', 'content': 0.14381344616413116, 'timestamp': '2025-09-10 02:30:52.888017', 'step': 4055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.920503', 'step': 4055, 'epoch': 1} {'type': 'loss', 'content': 0.16189086437225342, 'timestamp': '2025-09-10 02:30:52.945983', 'step': 4056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:52.983867', 'step': 4056, 'epoch': 1} {'type': 'loss', 'content': 0.21383024752140045, 'timestamp': '2025-09-10 02:30:52.985812', 'step': 4057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.017601', 'step': 4057, 'epoch': 1} {'type': 'loss', 'content': 0.1985180824995041, 'timestamp': '2025-09-10 02:30:53.022276', 'step': 4058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:53.063127', 'step': 4058, 'epoch': 1} {'type': 'loss', 'content': 0.18851959705352783, 'timestamp': '2025-09-10 02:30:53.067210', 'step': 4059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.104355', 'step': 4059, 'epoch': 1} {'type': 'loss', 'content': 0.18953417241573334, 'timestamp': '2025-09-10 02:30:53.130051', 'step': 4060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:53.162095', 'step': 4060, 'epoch': 1} {'type': 'loss', 'content': 0.23589755594730377, 'timestamp': '2025-09-10 02:30:53.164125', 'step': 4061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:53.195659', 'step': 4061, 'epoch': 1} {'type': 'loss', 'content': 0.16539353132247925, 'timestamp': '2025-09-10 02:30:53.198041', 'step': 4062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.229893', 'step': 4062, 'epoch': 1} {'type': 'loss', 'content': 0.19134993851184845, 'timestamp': '2025-09-10 02:30:53.232156', 'step': 4063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:53.263957', 'step': 4063, 'epoch': 1} {'type': 'loss', 'content': 0.22995109856128693, 'timestamp': '2025-09-10 02:30:53.287525', 'step': 4064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.320449', 'step': 4064, 'epoch': 1} {'type': 'loss', 'content': 0.14072947204113007, 'timestamp': '2025-09-10 02:30:53.322362', 'step': 4065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.355053', 'step': 4065, 'epoch': 1} {'type': 'loss', 'content': 0.16365946829319, 'timestamp': '2025-09-10 02:30:53.357386', 'step': 4066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:53.388356', 'step': 4066, 'epoch': 1} {'type': 'loss', 'content': 0.17673633992671967, 'timestamp': '2025-09-10 02:30:53.390728', 'step': 4067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.421902', 'step': 4067, 'epoch': 1} {'type': 'loss', 'content': 0.1098785400390625, 'timestamp': '2025-09-10 02:30:53.447061', 'step': 4068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:53.478013', 'step': 4068, 'epoch': 1} {'type': 'loss', 'content': 0.1844097077846527, 'timestamp': '2025-09-10 02:30:53.480009', 'step': 4069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:53.510650', 'step': 4069, 'epoch': 1} {'type': 'loss', 'content': 0.13183429837226868, 'timestamp': '2025-09-10 02:30:53.513401', 'step': 4070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.545087', 'step': 4070, 'epoch': 1} {'type': 'loss', 'content': 0.11103082448244095, 'timestamp': '2025-09-10 02:30:53.548725', 'step': 4071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.580485', 'step': 4071, 'epoch': 1} {'type': 'loss', 'content': 0.14868558943271637, 'timestamp': '2025-09-10 02:30:53.604402', 'step': 4072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.635489', 'step': 4072, 'epoch': 1} {'type': 'loss', 'content': 0.2434757947921753, 'timestamp': '2025-09-10 02:30:53.637965', 'step': 4073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:53.669011', 'step': 4073, 'epoch': 1} {'type': 'loss', 'content': 0.08656105399131775, 'timestamp': '2025-09-10 02:30:53.671676', 'step': 4074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:53.704153', 'step': 4074, 'epoch': 1} {'type': 'loss', 'content': 0.18163253366947174, 'timestamp': '2025-09-10 02:30:53.706664', 'step': 4075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:53.738352', 'step': 4075, 'epoch': 1} {'type': 'loss', 'content': 0.2031688094139099, 'timestamp': '2025-09-10 02:30:53.762208', 'step': 4076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:53.792626', 'step': 4076, 'epoch': 1} {'type': 'loss', 'content': 0.17371971905231476, 'timestamp': '2025-09-10 02:30:53.794920', 'step': 4077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.826022', 'step': 4077, 'epoch': 1} {'type': 'loss', 'content': 0.14065968990325928, 'timestamp': '2025-09-10 02:30:53.828373', 'step': 4078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.861608', 'step': 4078, 'epoch': 1} {'type': 'loss', 'content': 0.11667321622371674, 'timestamp': '2025-09-10 02:30:53.863918', 'step': 4079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.894463', 'step': 4079, 'epoch': 1} {'type': 'loss', 'content': 0.2710956335067749, 'timestamp': '2025-09-10 02:30:53.917978', 'step': 4080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.948626', 'step': 4080, 'epoch': 1} {'type': 'loss', 'content': 0.16275392472743988, 'timestamp': '2025-09-10 02:30:53.951049', 'step': 4081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:53.982490', 'step': 4081, 'epoch': 1} {'type': 'loss', 'content': 0.2032880187034607, 'timestamp': '2025-09-10 02:30:53.984781', 'step': 4082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.015866', 'step': 4082, 'epoch': 1} {'type': 'loss', 'content': 0.1172296553850174, 'timestamp': '2025-09-10 02:30:54.018116', 'step': 4083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:54.048768', 'step': 4083, 'epoch': 1} {'type': 'loss', 'content': 0.17880967259407043, 'timestamp': '2025-09-10 02:30:54.072494', 'step': 4084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:54.122379', 'step': 4084, 'epoch': 1} {'type': 'loss', 'content': 0.18616671860218048, 'timestamp': '2025-09-10 02:30:54.126794', 'step': 4085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.171073', 'step': 4085, 'epoch': 1} {'type': 'loss', 'content': 0.1473257839679718, 'timestamp': '2025-09-10 02:30:54.176617', 'step': 4086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:54.215822', 'step': 4086, 'epoch': 1} {'type': 'loss', 'content': 0.16854551434516907, 'timestamp': '2025-09-10 02:30:54.218311', 'step': 4087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:54.250762', 'step': 4087, 'epoch': 1} {'type': 'loss', 'content': 0.18504922091960907, 'timestamp': '2025-09-10 02:30:54.274832', 'step': 4088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.305588', 'step': 4088, 'epoch': 1} {'type': 'loss', 'content': 0.09459348767995834, 'timestamp': '2025-09-10 02:30:54.307580', 'step': 4089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.338028', 'step': 4089, 'epoch': 1} {'type': 'loss', 'content': 0.057787202298641205, 'timestamp': '2025-09-10 02:30:54.340389', 'step': 4090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.371354', 'step': 4090, 'epoch': 1} {'type': 'loss', 'content': 0.163971409201622, 'timestamp': '2025-09-10 02:30:54.373543', 'step': 4091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:54.404285', 'step': 4091, 'epoch': 1} {'type': 'loss', 'content': 0.12691469490528107, 'timestamp': '2025-09-10 02:30:54.428159', 'step': 4092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.459136', 'step': 4092, 'epoch': 1} {'type': 'loss', 'content': 0.19350890815258026, 'timestamp': '2025-09-10 02:30:54.461319', 'step': 4093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.492380', 'step': 4093, 'epoch': 1} {'type': 'loss', 'content': 0.10298188775777817, 'timestamp': '2025-09-10 02:30:54.494680', 'step': 4094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.525811', 'step': 4094, 'epoch': 1} {'type': 'loss', 'content': 0.1476115584373474, 'timestamp': '2025-09-10 02:30:54.528132', 'step': 4095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.558664', 'step': 4095, 'epoch': 1} {'type': 'loss', 'content': 0.18919290602207184, 'timestamp': '2025-09-10 02:30:54.582247', 'step': 4096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.614280', 'step': 4096, 'epoch': 1} {'type': 'loss', 'content': 0.12141860276460648, 'timestamp': '2025-09-10 02:30:54.616914', 'step': 4097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:54.647886', 'step': 4097, 'epoch': 1} {'type': 'loss', 'content': 0.1511303335428238, 'timestamp': '2025-09-10 02:30:54.650476', 'step': 4098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.681405', 'step': 4098, 'epoch': 1} {'type': 'loss', 'content': 0.21917589008808136, 'timestamp': '2025-09-10 02:30:54.683705', 'step': 4099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.717286', 'step': 4099, 'epoch': 1} {'type': 'loss', 'content': 0.16345733404159546, 'timestamp': '2025-09-10 02:30:54.741370', 'step': 4100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.773472', 'step': 4100, 'epoch': 1} {'type': 'loss', 'content': 0.17298541963100433, 'timestamp': '2025-09-10 02:30:54.775714', 'step': 4101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:54.810597', 'step': 4101, 'epoch': 1} {'type': 'loss', 'content': 0.1522553712129593, 'timestamp': '2025-09-10 02:30:54.812659', 'step': 4102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:54.843715', 'step': 4102, 'epoch': 1} {'type': 'loss', 'content': 0.21289344131946564, 'timestamp': '2025-09-10 02:30:54.846363', 'step': 4103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.877054', 'step': 4103, 'epoch': 1} {'type': 'loss', 'content': 0.10360848158597946, 'timestamp': '2025-09-10 02:30:54.900683', 'step': 4104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:54.932400', 'step': 4104, 'epoch': 1} {'type': 'loss', 'content': 0.0885457694530487, 'timestamp': '2025-09-10 02:30:54.934855', 'step': 4105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:54.966134', 'step': 4105, 'epoch': 1} {'type': 'loss', 'content': 0.17319358885288239, 'timestamp': '2025-09-10 02:30:54.968379', 'step': 4106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.998171', 'step': 4106, 'epoch': 1} {'type': 'loss', 'content': 0.19086357951164246, 'timestamp': '2025-09-10 02:30:55.000426', 'step': 4107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.033754', 'step': 4107, 'epoch': 1} {'type': 'loss', 'content': 0.16695837676525116, 'timestamp': '2025-09-10 02:30:55.059964', 'step': 4108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:55.092953', 'step': 4108, 'epoch': 1} {'type': 'loss', 'content': 0.17362438142299652, 'timestamp': '2025-09-10 02:30:55.095737', 'step': 4109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.127083', 'step': 4109, 'epoch': 1} {'type': 'loss', 'content': 0.1358492076396942, 'timestamp': '2025-09-10 02:30:55.129313', 'step': 4110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.160328', 'step': 4110, 'epoch': 1} {'type': 'loss', 'content': 0.2753163278102875, 'timestamp': '2025-09-10 02:30:55.162636', 'step': 4111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:55.194015', 'step': 4111, 'epoch': 1} {'type': 'loss', 'content': 0.11409542709589005, 'timestamp': '2025-09-10 02:30:55.217582', 'step': 4112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.248684', 'step': 4112, 'epoch': 1} {'type': 'loss', 'content': 0.1376177817583084, 'timestamp': '2025-09-10 02:30:55.251204', 'step': 4113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:55.282658', 'step': 4113, 'epoch': 1} {'type': 'loss', 'content': 0.14938755333423615, 'timestamp': '2025-09-10 02:30:55.285169', 'step': 4114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.317201', 'step': 4114, 'epoch': 1} {'type': 'loss', 'content': 0.13340571522712708, 'timestamp': '2025-09-10 02:30:55.319437', 'step': 4115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:55.350328', 'step': 4115, 'epoch': 1} {'type': 'loss', 'content': 0.09759601950645447, 'timestamp': '2025-09-10 02:30:55.374306', 'step': 4116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.405578', 'step': 4116, 'epoch': 1} {'type': 'loss', 'content': 0.17713500559329987, 'timestamp': '2025-09-10 02:30:55.407878', 'step': 4117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:55.440204', 'step': 4117, 'epoch': 1} {'type': 'loss', 'content': 0.11204838007688522, 'timestamp': '2025-09-10 02:30:55.442637', 'step': 4118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.473882', 'step': 4118, 'epoch': 1} {'type': 'loss', 'content': 0.18375509977340698, 'timestamp': '2025-09-10 02:30:55.476972', 'step': 4119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.508502', 'step': 4119, 'epoch': 1} {'type': 'loss', 'content': 0.14381366968154907, 'timestamp': '2025-09-10 02:30:55.532119', 'step': 4120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.562563', 'step': 4120, 'epoch': 1} {'type': 'loss', 'content': 0.19287531077861786, 'timestamp': '2025-09-10 02:30:55.565788', 'step': 4121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:55.597549', 'step': 4121, 'epoch': 1} {'type': 'loss', 'content': 0.13750632107257843, 'timestamp': '2025-09-10 02:30:55.602661', 'step': 4122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.634358', 'step': 4122, 'epoch': 1} {'type': 'loss', 'content': 0.2034996747970581, 'timestamp': '2025-09-10 02:30:55.636745', 'step': 4123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.667421', 'step': 4123, 'epoch': 1} {'type': 'loss', 'content': 0.19086411595344543, 'timestamp': '2025-09-10 02:30:55.691032', 'step': 4124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:55.722121', 'step': 4124, 'epoch': 1} {'type': 'loss', 'content': 0.18743760883808136, 'timestamp': '2025-09-10 02:30:55.724384', 'step': 4125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.756130', 'step': 4125, 'epoch': 1} {'type': 'loss', 'content': 0.20463243126869202, 'timestamp': '2025-09-10 02:30:55.758560', 'step': 4126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.790924', 'step': 4126, 'epoch': 1} {'type': 'loss', 'content': 0.10651064664125443, 'timestamp': '2025-09-10 02:30:55.793331', 'step': 4127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:55.824342', 'step': 4127, 'epoch': 1} {'type': 'loss', 'content': 0.13222834467887878, 'timestamp': '2025-09-10 02:30:55.848122', 'step': 4128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:55.881587', 'step': 4128, 'epoch': 1} {'type': 'loss', 'content': 0.08024729043245316, 'timestamp': '2025-09-10 02:30:55.883997', 'step': 4129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.915541', 'step': 4129, 'epoch': 1} {'type': 'loss', 'content': 0.18599846959114075, 'timestamp': '2025-09-10 02:30:55.917750', 'step': 4130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.949846', 'step': 4130, 'epoch': 1} {'type': 'loss', 'content': 0.18016396462917328, 'timestamp': '2025-09-10 02:30:55.951886', 'step': 4131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:55.983981', 'step': 4131, 'epoch': 1} {'type': 'loss', 'content': 0.13006962835788727, 'timestamp': '2025-09-10 02:30:56.007302', 'step': 4132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:56.038184', 'step': 4132, 'epoch': 1} {'type': 'loss', 'content': 0.2188817411661148, 'timestamp': '2025-09-10 02:30:56.040648', 'step': 4133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:56.072883', 'step': 4133, 'epoch': 1} {'type': 'loss', 'content': 0.1749047040939331, 'timestamp': '2025-09-10 02:30:56.074933', 'step': 4134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.106406', 'step': 4134, 'epoch': 1} {'type': 'loss', 'content': 0.1769828200340271, 'timestamp': '2025-09-10 02:30:56.108821', 'step': 4135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.139929', 'step': 4135, 'epoch': 1} {'type': 'loss', 'content': 0.19946865737438202, 'timestamp': '2025-09-10 02:30:56.163506', 'step': 4136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.195031', 'step': 4136, 'epoch': 1} {'type': 'loss', 'content': 0.10518722236156464, 'timestamp': '2025-09-10 02:30:56.197408', 'step': 4137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:56.228409', 'step': 4137, 'epoch': 1} {'type': 'loss', 'content': 0.19900083541870117, 'timestamp': '2025-09-10 02:30:56.230779', 'step': 4138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.262097', 'step': 4138, 'epoch': 1} {'type': 'loss', 'content': 0.18857628107070923, 'timestamp': '2025-09-10 02:30:56.264505', 'step': 4139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.296132', 'step': 4139, 'epoch': 1} {'type': 'loss', 'content': 0.11940864473581314, 'timestamp': '2025-09-10 02:30:56.320286', 'step': 4140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.352262', 'step': 4140, 'epoch': 1} {'type': 'loss', 'content': 0.12334708869457245, 'timestamp': '2025-09-10 02:30:56.354425', 'step': 4141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.385098', 'step': 4141, 'epoch': 1} {'type': 'loss', 'content': 0.08664724230766296, 'timestamp': '2025-09-10 02:30:56.387518', 'step': 4142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.418789', 'step': 4142, 'epoch': 1} {'type': 'loss', 'content': 0.1603151261806488, 'timestamp': '2025-09-10 02:30:56.420810', 'step': 4143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.451871', 'step': 4143, 'epoch': 1} {'type': 'loss', 'content': 0.15979981422424316, 'timestamp': '2025-09-10 02:30:56.475806', 'step': 4144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.507346', 'step': 4144, 'epoch': 1} {'type': 'loss', 'content': 0.160322904586792, 'timestamp': '2025-09-10 02:30:56.510328', 'step': 4145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.541043', 'step': 4145, 'epoch': 1} {'type': 'loss', 'content': 0.18343815207481384, 'timestamp': '2025-09-10 02:30:56.544167', 'step': 4146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:56.576823', 'step': 4146, 'epoch': 1} {'type': 'loss', 'content': 0.1946943998336792, 'timestamp': '2025-09-10 02:30:56.579730', 'step': 4147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.610600', 'step': 4147, 'epoch': 1} {'type': 'loss', 'content': 0.13690122961997986, 'timestamp': '2025-09-10 02:30:56.634302', 'step': 4148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:56.665784', 'step': 4148, 'epoch': 1} {'type': 'loss', 'content': 0.09847600758075714, 'timestamp': '2025-09-10 02:30:56.669114', 'step': 4149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:56.700526', 'step': 4149, 'epoch': 1} {'type': 'loss', 'content': 0.16956810653209686, 'timestamp': '2025-09-10 02:30:56.702659', 'step': 4150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.735166', 'step': 4150, 'epoch': 1} {'type': 'loss', 'content': 0.17466799914836884, 'timestamp': '2025-09-10 02:30:56.737914', 'step': 4151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.768615', 'step': 4151, 'epoch': 1} {'type': 'loss', 'content': 0.08511383086442947, 'timestamp': '2025-09-10 02:30:56.792872', 'step': 4152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:56.826354', 'step': 4152, 'epoch': 1} {'type': 'loss', 'content': 0.1697448343038559, 'timestamp': '2025-09-10 02:30:56.829314', 'step': 4153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:56.863208', 'step': 4153, 'epoch': 1} {'type': 'loss', 'content': 0.18476590514183044, 'timestamp': '2025-09-10 02:30:56.865405', 'step': 4154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.900333', 'step': 4154, 'epoch': 1} {'type': 'loss', 'content': 0.06321478635072708, 'timestamp': '2025-09-10 02:30:56.903201', 'step': 4155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.935706', 'step': 4155, 'epoch': 1} {'type': 'loss', 'content': 0.1718444973230362, 'timestamp': '2025-09-10 02:30:56.960025', 'step': 4156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:56.992260', 'step': 4156, 'epoch': 1} {'type': 'loss', 'content': 0.212217316031456, 'timestamp': '2025-09-10 02:30:56.994394', 'step': 4157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:30:57.025871', 'step': 4157, 'epoch': 1} {'type': 'loss', 'content': 0.1401662975549698, 'timestamp': '2025-09-10 02:30:57.028323', 'step': 4158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.060656', 'step': 4158, 'epoch': 1} {'type': 'loss', 'content': 0.19736266136169434, 'timestamp': '2025-09-10 02:30:57.062912', 'step': 4159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.093024', 'step': 4159, 'epoch': 1} {'type': 'loss', 'content': 0.21128685772418976, 'timestamp': '2025-09-10 02:30:57.116688', 'step': 4160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.148407', 'step': 4160, 'epoch': 1} {'type': 'loss', 'content': 0.12475761026144028, 'timestamp': '2025-09-10 02:30:57.152547', 'step': 4161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:57.186222', 'step': 4161, 'epoch': 1} {'type': 'loss', 'content': 0.1928679645061493, 'timestamp': '2025-09-10 02:30:57.188548', 'step': 4162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.220128', 'step': 4162, 'epoch': 1} {'type': 'loss', 'content': 0.1661931723356247, 'timestamp': '2025-09-10 02:30:57.222111', 'step': 4163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:57.253417', 'step': 4163, 'epoch': 1} {'type': 'loss', 'content': 0.08916787058115005, 'timestamp': '2025-09-10 02:30:57.277084', 'step': 4164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.310216', 'step': 4164, 'epoch': 1} {'type': 'loss', 'content': 0.14222724735736847, 'timestamp': '2025-09-10 02:30:57.312366', 'step': 4165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.342876', 'step': 4165, 'epoch': 1} {'type': 'loss', 'content': 0.16550064086914062, 'timestamp': '2025-09-10 02:30:57.347250', 'step': 4166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.378040', 'step': 4166, 'epoch': 1} {'type': 'loss', 'content': 0.14285030961036682, 'timestamp': '2025-09-10 02:30:57.380246', 'step': 4167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.410841', 'step': 4167, 'epoch': 1} {'type': 'loss', 'content': 0.09880006313323975, 'timestamp': '2025-09-10 02:30:57.434861', 'step': 4168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:57.467876', 'step': 4168, 'epoch': 1} {'type': 'loss', 'content': 0.10750340670347214, 'timestamp': '2025-09-10 02:30:57.473146', 'step': 4169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.512317', 'step': 4169, 'epoch': 1} {'type': 'loss', 'content': 0.11174848675727844, 'timestamp': '2025-09-10 02:30:57.514693', 'step': 4170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.546748', 'step': 4170, 'epoch': 1} {'type': 'loss', 'content': 0.12305848300457001, 'timestamp': '2025-09-10 02:30:57.549118', 'step': 4171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:57.580033', 'step': 4171, 'epoch': 1} {'type': 'loss', 'content': 0.13073915243148804, 'timestamp': '2025-09-10 02:30:57.603819', 'step': 4172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.637249', 'step': 4172, 'epoch': 1} {'type': 'loss', 'content': 0.15921126306056976, 'timestamp': '2025-09-10 02:30:57.642345', 'step': 4173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.677983', 'step': 4173, 'epoch': 1} {'type': 'loss', 'content': 0.11820285767316818, 'timestamp': '2025-09-10 02:30:57.682101', 'step': 4174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.712964', 'step': 4174, 'epoch': 1} {'type': 'loss', 'content': 0.09172306209802628, 'timestamp': '2025-09-10 02:30:57.715873', 'step': 4175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:57.747107', 'step': 4175, 'epoch': 1} {'type': 'loss', 'content': 0.09574870020151138, 'timestamp': '2025-09-10 02:30:57.773440', 'step': 4176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.816563', 'step': 4176, 'epoch': 1} {'type': 'loss', 'content': 0.18207786977291107, 'timestamp': '2025-09-10 02:30:57.819081', 'step': 4177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.851550', 'step': 4177, 'epoch': 1} {'type': 'loss', 'content': 0.16698022186756134, 'timestamp': '2025-09-10 02:30:57.856813', 'step': 4178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:57.899589', 'step': 4178, 'epoch': 1} {'type': 'loss', 'content': 0.1904481053352356, 'timestamp': '2025-09-10 02:30:57.902262', 'step': 4179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:57.933391', 'step': 4179, 'epoch': 1} {'type': 'loss', 'content': 0.13852174580097198, 'timestamp': '2025-09-10 02:30:57.957094', 'step': 4180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.991376', 'step': 4180, 'epoch': 1} {'type': 'loss', 'content': 0.04898062348365784, 'timestamp': '2025-09-10 02:30:57.994089', 'step': 4181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:58.026414', 'step': 4181, 'epoch': 1} {'type': 'loss', 'content': 0.1327393800020218, 'timestamp': '2025-09-10 02:30:58.029106', 'step': 4182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:58.060725', 'step': 4182, 'epoch': 1} {'type': 'loss', 'content': 0.21567481756210327, 'timestamp': '2025-09-10 02:30:58.064906', 'step': 4183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:58.099724', 'step': 4183, 'epoch': 1} {'type': 'loss', 'content': 0.12904757261276245, 'timestamp': '2025-09-10 02:30:58.123663', 'step': 4184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.156050', 'step': 4184, 'epoch': 1} {'type': 'loss', 'content': 0.15275311470031738, 'timestamp': '2025-09-10 02:30:58.158232', 'step': 4185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.188764', 'step': 4185, 'epoch': 1} {'type': 'loss', 'content': 0.10034968703985214, 'timestamp': '2025-09-10 02:30:58.191365', 'step': 4186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:58.225255', 'step': 4186, 'epoch': 1} {'type': 'loss', 'content': 0.20997737348079681, 'timestamp': '2025-09-10 02:30:58.227400', 'step': 4187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:58.257935', 'step': 4187, 'epoch': 1} {'type': 'loss', 'content': 0.10453683882951736, 'timestamp': '2025-09-10 02:30:58.281485', 'step': 4188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:58.313879', 'step': 4188, 'epoch': 1} {'type': 'loss', 'content': 0.09696681797504425, 'timestamp': '2025-09-10 02:30:58.316222', 'step': 4189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:58.347724', 'step': 4189, 'epoch': 1} {'type': 'loss', 'content': 0.22594809532165527, 'timestamp': '2025-09-10 02:30:58.350012', 'step': 4190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:58.381565', 'step': 4190, 'epoch': 1} {'type': 'loss', 'content': 0.1434735804796219, 'timestamp': '2025-09-10 02:30:58.383686', 'step': 4191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:58.415862', 'step': 4191, 'epoch': 1} {'type': 'loss', 'content': 0.14824505150318146, 'timestamp': '2025-09-10 02:30:58.439320', 'step': 4192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.471197', 'step': 4192, 'epoch': 1} {'type': 'loss', 'content': 0.16453222930431366, 'timestamp': '2025-09-10 02:30:58.473503', 'step': 4193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:58.504026', 'step': 4193, 'epoch': 1} {'type': 'loss', 'content': 0.22387976944446564, 'timestamp': '2025-09-10 02:30:58.506351', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:31:07.057774', 'step': 4194, 'epoch': 1} {'type': 'pplx', 'content': 9093.522336362104, 'timestamp': '2025-09-10 02:31:07.060517', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:07.091034', 'step': 4194, 'epoch': 1} {'type': 'loss', 'content': 0.17152516543865204, 'timestamp': '2025-09-10 02:31:07.093507', 'step': 4195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:07.124814', 'step': 4195, 'epoch': 1} {'type': 'loss', 'content': 0.12280377745628357, 'timestamp': '2025-09-10 02:31:07.150529', 'step': 4196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.182673', 'step': 4196, 'epoch': 1} {'type': 'loss', 'content': 0.13051946461200714, 'timestamp': '2025-09-10 02:31:07.185003', 'step': 4197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.219175', 'step': 4197, 'epoch': 1} {'type': 'loss', 'content': 0.12557950615882874, 'timestamp': '2025-09-10 02:31:07.222602', 'step': 4198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.254232', 'step': 4198, 'epoch': 1} {'type': 'loss', 'content': 0.11881562322378159, 'timestamp': '2025-09-10 02:31:07.256903', 'step': 4199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.291826', 'step': 4199, 'epoch': 1} {'type': 'loss', 'content': 0.17488068342208862, 'timestamp': '2025-09-10 02:31:07.317544', 'step': 4200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.349309', 'step': 4200, 'epoch': 1} {'type': 'loss', 'content': 0.10131547600030899, 'timestamp': '2025-09-10 02:31:07.351896', 'step': 4201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.384591', 'step': 4201, 'epoch': 1} {'type': 'loss', 'content': 0.24227476119995117, 'timestamp': '2025-09-10 02:31:07.387380', 'step': 4202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.421313', 'step': 4202, 'epoch': 1} {'type': 'loss', 'content': 0.198525071144104, 'timestamp': '2025-09-10 02:31:07.429048', 'step': 4203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:07.467030', 'step': 4203, 'epoch': 1} {'type': 'loss', 'content': 0.2456703782081604, 'timestamp': '2025-09-10 02:31:07.493178', 'step': 4204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.526270', 'step': 4204, 'epoch': 1} {'type': 'loss', 'content': 0.13254699110984802, 'timestamp': '2025-09-10 02:31:07.528915', 'step': 4205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.558475', 'step': 4205, 'epoch': 1} {'type': 'loss', 'content': 0.17232289910316467, 'timestamp': '2025-09-10 02:31:07.561607', 'step': 4206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.594536', 'step': 4206, 'epoch': 1} {'type': 'loss', 'content': 0.15493451058864594, 'timestamp': '2025-09-10 02:31:07.596539', 'step': 4207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.627937', 'step': 4207, 'epoch': 1} {'type': 'loss', 'content': 0.13527911901474, 'timestamp': '2025-09-10 02:31:07.651570', 'step': 4208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.684723', 'step': 4208, 'epoch': 1} {'type': 'loss', 'content': 0.21336734294891357, 'timestamp': '2025-09-10 02:31:07.689530', 'step': 4209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.726549', 'step': 4209, 'epoch': 1} {'type': 'loss', 'content': 0.1983085423707962, 'timestamp': '2025-09-10 02:31:07.728707', 'step': 4210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.763420', 'step': 4210, 'epoch': 1} {'type': 'loss', 'content': 0.11815764009952545, 'timestamp': '2025-09-10 02:31:07.765704', 'step': 4211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.799448', 'step': 4211, 'epoch': 1} {'type': 'loss', 'content': 0.2928010821342468, 'timestamp': '2025-09-10 02:31:07.826776', 'step': 4212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.874656', 'step': 4212, 'epoch': 1} {'type': 'loss', 'content': 0.12110992521047592, 'timestamp': '2025-09-10 02:31:07.879737', 'step': 4213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.933099', 'step': 4213, 'epoch': 1} {'type': 'loss', 'content': 0.15171271562576294, 'timestamp': '2025-09-10 02:31:07.936068', 'step': 4214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.975947', 'step': 4214, 'epoch': 1} {'type': 'loss', 'content': 0.18435536324977875, 'timestamp': '2025-09-10 02:31:07.978601', 'step': 4215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:08.011062', 'step': 4215, 'epoch': 1} {'type': 'loss', 'content': 0.16330818831920624, 'timestamp': '2025-09-10 02:31:08.037761', 'step': 4216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.079548', 'step': 4216, 'epoch': 1} {'type': 'loss', 'content': 0.15961483120918274, 'timestamp': '2025-09-10 02:31:08.083106', 'step': 4217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:08.119823', 'step': 4217, 'epoch': 1} {'type': 'loss', 'content': 0.2252061367034912, 'timestamp': '2025-09-10 02:31:08.136564', 'step': 4218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.182974', 'step': 4218, 'epoch': 1} {'type': 'loss', 'content': 0.17954379320144653, 'timestamp': '2025-09-10 02:31:08.190631', 'step': 4219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:08.230589', 'step': 4219, 'epoch': 1} {'type': 'loss', 'content': 0.1494675725698471, 'timestamp': '2025-09-10 02:31:08.263117', 'step': 4220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:08.297680', 'step': 4220, 'epoch': 1} {'type': 'loss', 'content': 0.09850707650184631, 'timestamp': '2025-09-10 02:31:08.300187', 'step': 4221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.333851', 'step': 4221, 'epoch': 1} {'type': 'loss', 'content': 0.09550292044878006, 'timestamp': '2025-09-10 02:31:08.337762', 'step': 4222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:08.373939', 'step': 4222, 'epoch': 1} {'type': 'loss', 'content': 0.20541158318519592, 'timestamp': '2025-09-10 02:31:08.377295', 'step': 4223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:08.417066', 'step': 4223, 'epoch': 1} {'type': 'loss', 'content': 0.19462136924266815, 'timestamp': '2025-09-10 02:31:08.443222', 'step': 4224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:08.477586', 'step': 4224, 'epoch': 1} {'type': 'loss', 'content': 0.12622950971126556, 'timestamp': '2025-09-10 02:31:08.481314', 'step': 4225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:08.516671', 'step': 4225, 'epoch': 1} {'type': 'loss', 'content': 0.2112990915775299, 'timestamp': '2025-09-10 02:31:08.521834', 'step': 4226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.557787', 'step': 4226, 'epoch': 1} {'type': 'loss', 'content': 0.24037165939807892, 'timestamp': '2025-09-10 02:31:08.560693', 'step': 4227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:08.595088', 'step': 4227, 'epoch': 1} {'type': 'loss', 'content': 0.18649642169475555, 'timestamp': '2025-09-10 02:31:08.619731', 'step': 4228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:08.664421', 'step': 4228, 'epoch': 1} {'type': 'loss', 'content': 0.09037849307060242, 'timestamp': '2025-09-10 02:31:08.667697', 'step': 4229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:08.703328', 'step': 4229, 'epoch': 1} {'type': 'loss', 'content': 0.12348205596208572, 'timestamp': '2025-09-10 02:31:08.706288', 'step': 4230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:08.738437', 'step': 4230, 'epoch': 1} {'type': 'loss', 'content': 0.11584454029798508, 'timestamp': '2025-09-10 02:31:08.744358', 'step': 4231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:08.780611', 'step': 4231, 'epoch': 1} {'type': 'loss', 'content': 0.1332472562789917, 'timestamp': '2025-09-10 02:31:08.806600', 'step': 4232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.841944', 'step': 4232, 'epoch': 1} {'type': 'loss', 'content': 0.2185291349887848, 'timestamp': '2025-09-10 02:31:08.844704', 'step': 4233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:08.876083', 'step': 4233, 'epoch': 1} {'type': 'loss', 'content': 0.23282326757907867, 'timestamp': '2025-09-10 02:31:08.879310', 'step': 4234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:08.912147', 'step': 4234, 'epoch': 1} {'type': 'loss', 'content': 0.27933788299560547, 'timestamp': '2025-09-10 02:31:08.914542', 'step': 4235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.948193', 'step': 4235, 'epoch': 1} {'type': 'loss', 'content': 0.19548451900482178, 'timestamp': '2025-09-10 02:31:08.972306', 'step': 4236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.006809', 'step': 4236, 'epoch': 1} {'type': 'loss', 'content': 0.18173252046108246, 'timestamp': '2025-09-10 02:31:09.011919', 'step': 4237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.046444', 'step': 4237, 'epoch': 1} {'type': 'loss', 'content': 0.13096436858177185, 'timestamp': '2025-09-10 02:31:09.048924', 'step': 4238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.081205', 'step': 4238, 'epoch': 1} {'type': 'loss', 'content': 0.15264543890953064, 'timestamp': '2025-09-10 02:31:09.084654', 'step': 4239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.118549', 'step': 4239, 'epoch': 1} {'type': 'loss', 'content': 0.059446223080158234, 'timestamp': '2025-09-10 02:31:09.144098', 'step': 4240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:09.178918', 'step': 4240, 'epoch': 1} {'type': 'loss', 'content': 0.14531134068965912, 'timestamp': '2025-09-10 02:31:09.182699', 'step': 4241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:09.216290', 'step': 4241, 'epoch': 1} {'type': 'loss', 'content': 0.12288694083690643, 'timestamp': '2025-09-10 02:31:09.218930', 'step': 4242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:09.250966', 'step': 4242, 'epoch': 1} {'type': 'loss', 'content': 0.21984905004501343, 'timestamp': '2025-09-10 02:31:09.257314', 'step': 4243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.296261', 'step': 4243, 'epoch': 1} {'type': 'loss', 'content': 0.161424458026886, 'timestamp': '2025-09-10 02:31:09.319665', 'step': 4244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.352409', 'step': 4244, 'epoch': 1} {'type': 'loss', 'content': 0.12496281415224075, 'timestamp': '2025-09-10 02:31:09.354828', 'step': 4245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.386970', 'step': 4245, 'epoch': 1} {'type': 'loss', 'content': 0.15779179334640503, 'timestamp': '2025-09-10 02:31:09.389183', 'step': 4246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.425077', 'step': 4246, 'epoch': 1} {'type': 'loss', 'content': 0.24775129556655884, 'timestamp': '2025-09-10 02:31:09.427226', 'step': 4247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:09.460205', 'step': 4247, 'epoch': 1} {'type': 'loss', 'content': 0.17665721476078033, 'timestamp': '2025-09-10 02:31:09.483728', 'step': 4248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.515294', 'step': 4248, 'epoch': 1} {'type': 'loss', 'content': 0.23802432417869568, 'timestamp': '2025-09-10 02:31:09.517511', 'step': 4249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:09.549817', 'step': 4249, 'epoch': 1} {'type': 'loss', 'content': 0.28871655464172363, 'timestamp': '2025-09-10 02:31:09.558409', 'step': 4250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.607747', 'step': 4250, 'epoch': 1} {'type': 'loss', 'content': 0.19305184483528137, 'timestamp': '2025-09-10 02:31:09.614569', 'step': 4251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:09.653755', 'step': 4251, 'epoch': 1} {'type': 'loss', 'content': 0.15804094076156616, 'timestamp': '2025-09-10 02:31:09.681922', 'step': 4252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:09.728620', 'step': 4252, 'epoch': 1} {'type': 'loss', 'content': 0.13494710624217987, 'timestamp': '2025-09-10 02:31:09.737264', 'step': 4253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:09.786101', 'step': 4253, 'epoch': 1} {'type': 'loss', 'content': 0.17268933355808258, 'timestamp': '2025-09-10 02:31:09.792032', 'step': 4254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:09.834726', 'step': 4254, 'epoch': 1} {'type': 'loss', 'content': 0.1476851850748062, 'timestamp': '2025-09-10 02:31:09.842841', 'step': 4255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:31:09.887902', 'step': 4255, 'epoch': 1} {'type': 'loss', 'content': 0.10998678207397461, 'timestamp': '2025-09-10 02:31:09.920811', 'step': 4256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:09.974619', 'step': 4256, 'epoch': 1} {'type': 'loss', 'content': 0.20161907374858856, 'timestamp': '2025-09-10 02:31:09.980094', 'step': 4257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.023781', 'step': 4257, 'epoch': 1} {'type': 'loss', 'content': 0.27720946073532104, 'timestamp': '2025-09-10 02:31:10.027230', 'step': 4258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:10.058166', 'step': 4258, 'epoch': 1} {'type': 'loss', 'content': 0.15097682178020477, 'timestamp': '2025-09-10 02:31:10.060595', 'step': 4259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:10.094552', 'step': 4259, 'epoch': 1} {'type': 'loss', 'content': 0.11727308481931686, 'timestamp': '2025-09-10 02:31:10.118279', 'step': 4260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:10.150437', 'step': 4260, 'epoch': 1} {'type': 'loss', 'content': 0.1740592122077942, 'timestamp': '2025-09-10 02:31:10.152694', 'step': 4261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.183875', 'step': 4261, 'epoch': 1} {'type': 'loss', 'content': 0.16218441724777222, 'timestamp': '2025-09-10 02:31:10.185672', 'step': 4262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:10.217838', 'step': 4262, 'epoch': 1} {'type': 'loss', 'content': 0.17285765707492828, 'timestamp': '2025-09-10 02:31:10.220404', 'step': 4263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.252914', 'step': 4263, 'epoch': 1} {'type': 'loss', 'content': 0.08953697979450226, 'timestamp': '2025-09-10 02:31:10.277168', 'step': 4264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.309676', 'step': 4264, 'epoch': 1} {'type': 'loss', 'content': 0.1250428706407547, 'timestamp': '2025-09-10 02:31:10.312566', 'step': 4265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:10.345821', 'step': 4265, 'epoch': 1} {'type': 'loss', 'content': 0.1954585760831833, 'timestamp': '2025-09-10 02:31:10.348160', 'step': 4266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:10.379221', 'step': 4266, 'epoch': 1} {'type': 'loss', 'content': 0.14097048342227936, 'timestamp': '2025-09-10 02:31:10.381997', 'step': 4267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:10.413517', 'step': 4267, 'epoch': 1} {'type': 'loss', 'content': 0.19113275408744812, 'timestamp': '2025-09-10 02:31:10.438324', 'step': 4268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:10.472209', 'step': 4268, 'epoch': 1} {'type': 'loss', 'content': 0.19538171589374542, 'timestamp': '2025-09-10 02:31:10.476318', 'step': 4269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.507909', 'step': 4269, 'epoch': 1} {'type': 'loss', 'content': 0.08562415838241577, 'timestamp': '2025-09-10 02:31:10.514465', 'step': 4270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:10.547631', 'step': 4270, 'epoch': 1} {'type': 'loss', 'content': 0.1421354115009308, 'timestamp': '2025-09-10 02:31:10.551027', 'step': 4271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.581866', 'step': 4271, 'epoch': 1} {'type': 'loss', 'content': 0.1292319893836975, 'timestamp': '2025-09-10 02:31:10.610121', 'step': 4272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.291813', 'step': 4272, 'epoch': 1} {'type': 'loss', 'content': 0.17127856612205505, 'timestamp': '2025-09-10 02:31:11.294306', 'step': 4273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.326315', 'step': 4273, 'epoch': 1} {'type': 'loss', 'content': 0.23142965137958527, 'timestamp': '2025-09-10 02:31:11.328798', 'step': 4274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:11.361347', 'step': 4274, 'epoch': 1} {'type': 'loss', 'content': 0.1340266764163971, 'timestamp': '2025-09-10 02:31:11.364897', 'step': 4275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.397616', 'step': 4275, 'epoch': 1} {'type': 'loss', 'content': 0.14142604172229767, 'timestamp': '2025-09-10 02:31:11.422351', 'step': 4276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.454876', 'step': 4276, 'epoch': 1} {'type': 'loss', 'content': 0.1170586571097374, 'timestamp': '2025-09-10 02:31:11.457201', 'step': 4277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:11.490212', 'step': 4277, 'epoch': 1} {'type': 'loss', 'content': 0.08668683469295502, 'timestamp': '2025-09-10 02:31:11.494166', 'step': 4278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:11.533254', 'step': 4278, 'epoch': 1} {'type': 'loss', 'content': 0.15209007263183594, 'timestamp': '2025-09-10 02:31:11.536206', 'step': 4279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.568964', 'step': 4279, 'epoch': 1} {'type': 'loss', 'content': 0.22192782163619995, 'timestamp': '2025-09-10 02:31:11.592291', 'step': 4280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.627681', 'step': 4280, 'epoch': 1} {'type': 'loss', 'content': 0.13371096551418304, 'timestamp': '2025-09-10 02:31:11.631268', 'step': 4281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.667427', 'step': 4281, 'epoch': 1} {'type': 'loss', 'content': 0.09356909245252609, 'timestamp': '2025-09-10 02:31:11.669470', 'step': 4282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:11.702014', 'step': 4282, 'epoch': 1} {'type': 'loss', 'content': 0.13573230803012848, 'timestamp': '2025-09-10 02:31:11.706090', 'step': 4283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:11.741715', 'step': 4283, 'epoch': 1} {'type': 'loss', 'content': 0.18770195543766022, 'timestamp': '2025-09-10 02:31:11.765522', 'step': 4284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.800200', 'step': 4284, 'epoch': 1} {'type': 'loss', 'content': 0.16371917724609375, 'timestamp': '2025-09-10 02:31:11.802953', 'step': 4285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.842134', 'step': 4285, 'epoch': 1} {'type': 'loss', 'content': 0.18995605409145355, 'timestamp': '2025-09-10 02:31:11.846545', 'step': 4286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:11.883867', 'step': 4286, 'epoch': 1} {'type': 'loss', 'content': 0.16896389424800873, 'timestamp': '2025-09-10 02:31:11.886438', 'step': 4287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.919770', 'step': 4287, 'epoch': 1} {'type': 'loss', 'content': 0.1589110642671585, 'timestamp': '2025-09-10 02:31:11.943523', 'step': 4288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.002893', 'step': 4288, 'epoch': 1} {'type': 'loss', 'content': 0.18101797997951508, 'timestamp': '2025-09-10 02:31:12.005080', 'step': 4289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.040543', 'step': 4289, 'epoch': 1} {'type': 'loss', 'content': 0.23404429852962494, 'timestamp': '2025-09-10 02:31:12.044176', 'step': 4290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.078577', 'step': 4290, 'epoch': 1} {'type': 'loss', 'content': 0.13062354922294617, 'timestamp': '2025-09-10 02:31:12.080756', 'step': 4291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.112460', 'step': 4291, 'epoch': 1} {'type': 'loss', 'content': 0.16093896329402924, 'timestamp': '2025-09-10 02:31:12.136087', 'step': 4292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:12.171185', 'step': 4292, 'epoch': 1} {'type': 'loss', 'content': 0.2734571695327759, 'timestamp': '2025-09-10 02:31:12.173466', 'step': 4293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:12.207011', 'step': 4293, 'epoch': 1} {'type': 'loss', 'content': 0.12150593101978302, 'timestamp': '2025-09-10 02:31:12.209209', 'step': 4294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:12.242045', 'step': 4294, 'epoch': 1} {'type': 'loss', 'content': 0.13400185108184814, 'timestamp': '2025-09-10 02:31:12.246417', 'step': 4295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:12.279155', 'step': 4295, 'epoch': 1} {'type': 'loss', 'content': 0.1635930836200714, 'timestamp': '2025-09-10 02:31:12.307597', 'step': 4296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.344972', 'step': 4296, 'epoch': 1} {'type': 'loss', 'content': 0.1695382446050644, 'timestamp': '2025-09-10 02:31:12.347446', 'step': 4297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:12.380499', 'step': 4297, 'epoch': 1} {'type': 'loss', 'content': 0.11316270381212234, 'timestamp': '2025-09-10 02:31:12.382789', 'step': 4298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.414858', 'step': 4298, 'epoch': 1} {'type': 'loss', 'content': 0.24064625799655914, 'timestamp': '2025-09-10 02:31:12.416939', 'step': 4299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.449325', 'step': 4299, 'epoch': 1} {'type': 'loss', 'content': 0.1478482335805893, 'timestamp': '2025-09-10 02:31:12.473542', 'step': 4300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:31:12.507288', 'step': 4300, 'epoch': 1} {'type': 'loss', 'content': 0.19764438271522522, 'timestamp': '2025-09-10 02:31:12.509563', 'step': 4301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:12.540739', 'step': 4301, 'epoch': 1} {'type': 'loss', 'content': 0.10830815136432648, 'timestamp': '2025-09-10 02:31:12.542674', 'step': 4302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.580790', 'step': 4302, 'epoch': 1} {'type': 'loss', 'content': 0.169451504945755, 'timestamp': '2025-09-10 02:31:12.582811', 'step': 4303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.614160', 'step': 4303, 'epoch': 1} {'type': 'loss', 'content': 0.12146374583244324, 'timestamp': '2025-09-10 02:31:12.637737', 'step': 4304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.669317', 'step': 4304, 'epoch': 1} {'type': 'loss', 'content': 0.17909757792949677, 'timestamp': '2025-09-10 02:31:12.671276', 'step': 4305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:12.703761', 'step': 4305, 'epoch': 1} {'type': 'loss', 'content': 0.09686587750911713, 'timestamp': '2025-09-10 02:31:12.707653', 'step': 4306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:12.745888', 'step': 4306, 'epoch': 1} {'type': 'loss', 'content': 0.1233661100268364, 'timestamp': '2025-09-10 02:31:12.748495', 'step': 4307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:12.782454', 'step': 4307, 'epoch': 1} {'type': 'loss', 'content': 0.17752298712730408, 'timestamp': '2025-09-10 02:31:12.806150', 'step': 4308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.839144', 'step': 4308, 'epoch': 1} {'type': 'loss', 'content': 0.15388864278793335, 'timestamp': '2025-09-10 02:31:12.841843', 'step': 4309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.873594', 'step': 4309, 'epoch': 1} {'type': 'loss', 'content': 0.18787775933742523, 'timestamp': '2025-09-10 02:31:12.877417', 'step': 4310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.912820', 'step': 4310, 'epoch': 1} {'type': 'loss', 'content': 0.06358691304922104, 'timestamp': '2025-09-10 02:31:12.915162', 'step': 4311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.948227', 'step': 4311, 'epoch': 1} {'type': 'loss', 'content': 0.1821787804365158, 'timestamp': '2025-09-10 02:31:12.972469', 'step': 4312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:13.008642', 'step': 4312, 'epoch': 1} {'type': 'loss', 'content': 0.11645307391881943, 'timestamp': '2025-09-10 02:31:13.010981', 'step': 4313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:13.042578', 'step': 4313, 'epoch': 1} {'type': 'loss', 'content': 0.1873520165681839, 'timestamp': '2025-09-10 02:31:13.045128', 'step': 4314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:13.076571', 'step': 4314, 'epoch': 1} {'type': 'loss', 'content': 0.2293163686990738, 'timestamp': '2025-09-10 02:31:13.080676', 'step': 4315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:13.112411', 'step': 4315, 'epoch': 1} {'type': 'loss', 'content': 0.11281299591064453, 'timestamp': '2025-09-10 02:31:13.137887', 'step': 4316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:13.169040', 'step': 4316, 'epoch': 1} {'type': 'loss', 'content': 0.28132525086402893, 'timestamp': '2025-09-10 02:31:13.171841', 'step': 4317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:13.203875', 'step': 4317, 'epoch': 1} {'type': 'loss', 'content': 0.11427208036184311, 'timestamp': '2025-09-10 02:31:13.208232', 'step': 4318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:13.240706', 'step': 4318, 'epoch': 1} {'type': 'loss', 'content': 0.15222816169261932, 'timestamp': '2025-09-10 02:31:13.248805', 'step': 4319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:13.285721', 'step': 4319, 'epoch': 1} {'type': 'loss', 'content': 0.22783370316028595, 'timestamp': '2025-09-10 02:31:13.311936', 'step': 4320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:13.359762', 'step': 4320, 'epoch': 1} {'type': 'loss', 'content': 0.12125413864850998, 'timestamp': '2025-09-10 02:31:13.363738', 'step': 4321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:13.396762', 'step': 4321, 'epoch': 1} {'type': 'loss', 'content': 0.2101483792066574, 'timestamp': '2025-09-10 02:31:13.401613', 'step': 4322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:13.437591', 'step': 4322, 'epoch': 1} {'type': 'loss', 'content': 0.22411857545375824, 'timestamp': '2025-09-10 02:31:13.439898', 'step': 4323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:13.472251', 'step': 4323, 'epoch': 1} {'type': 'loss', 'content': 0.20723462104797363, 'timestamp': '2025-09-10 02:31:13.495826', 'step': 4324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:13.528434', 'step': 4324, 'epoch': 1} {'type': 'loss', 'content': 0.11053714156150818, 'timestamp': '2025-09-10 02:31:13.532385', 'step': 4325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:13.565003', 'step': 4325, 'epoch': 1} {'type': 'loss', 'content': 0.14083291590213776, 'timestamp': '2025-09-10 02:31:13.568274', 'step': 4326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:13.603936', 'step': 4326, 'epoch': 1} {'type': 'loss', 'content': 0.30772334337234497, 'timestamp': '2025-09-10 02:31:13.606235', 'step': 4327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:13.645911', 'step': 4327, 'epoch': 1} {'type': 'loss', 'content': 0.15253911912441254, 'timestamp': '2025-09-10 02:31:13.670043', 'step': 4328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:13.705242', 'step': 4328, 'epoch': 1} {'type': 'loss', 'content': 0.14513938128948212, 'timestamp': '2025-09-10 02:31:13.710419', 'step': 4329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:13.746144', 'step': 4329, 'epoch': 1} {'type': 'loss', 'content': 0.20572251081466675, 'timestamp': '2025-09-10 02:31:13.754449', 'step': 4330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:13.793303', 'step': 4330, 'epoch': 1} {'type': 'loss', 'content': 0.1852225810289383, 'timestamp': '2025-09-10 02:31:13.810429', 'step': 4331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:13.862619', 'step': 4331, 'epoch': 1} {'type': 'loss', 'content': 0.10482712835073471, 'timestamp': '2025-09-10 02:31:13.889151', 'step': 4332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:13.934636', 'step': 4332, 'epoch': 1} {'type': 'loss', 'content': 0.15646010637283325, 'timestamp': '2025-09-10 02:31:13.949300', 'step': 4333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:14.001224', 'step': 4333, 'epoch': 1} {'type': 'loss', 'content': 0.07569701969623566, 'timestamp': '2025-09-10 02:31:14.009420', 'step': 4334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:14.057207', 'step': 4334, 'epoch': 1} {'type': 'loss', 'content': 0.11749899387359619, 'timestamp': '2025-09-10 02:31:14.072134', 'step': 4335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:14.156903', 'step': 4335, 'epoch': 1} {'type': 'loss', 'content': 0.19126200675964355, 'timestamp': '2025-09-10 02:31:14.202935', 'step': 4336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:14.252482', 'step': 4336, 'epoch': 1} {'type': 'loss', 'content': 0.12840162217617035, 'timestamp': '2025-09-10 02:31:14.257993', 'step': 4337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:14.306061', 'step': 4337, 'epoch': 1} {'type': 'loss', 'content': 0.20113538205623627, 'timestamp': '2025-09-10 02:31:14.319442', 'step': 4338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:14.369047', 'step': 4338, 'epoch': 1} {'type': 'loss', 'content': 0.16881246864795685, 'timestamp': '2025-09-10 02:31:14.374031', 'step': 4339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:14.417184', 'step': 4339, 'epoch': 1} {'type': 'loss', 'content': 0.11880569905042648, 'timestamp': '2025-09-10 02:31:14.444288', 'step': 4340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:14.487935', 'step': 4340, 'epoch': 1} {'type': 'loss', 'content': 0.13501988351345062, 'timestamp': '2025-09-10 02:31:14.496594', 'step': 4341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:14.546633', 'step': 4341, 'epoch': 1} {'type': 'loss', 'content': 0.1528489738702774, 'timestamp': '2025-09-10 02:31:14.553352', 'step': 4342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:14.596677', 'step': 4342, 'epoch': 1} {'type': 'loss', 'content': 0.1658528745174408, 'timestamp': '2025-09-10 02:31:14.604073', 'step': 4343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:14.653567', 'step': 4343, 'epoch': 1} {'type': 'loss', 'content': 0.19183912873268127, 'timestamp': '2025-09-10 02:31:14.682520', 'step': 4344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:14.745279', 'step': 4344, 'epoch': 1} {'type': 'loss', 'content': 0.09440381079912186, 'timestamp': '2025-09-10 02:31:14.754588', 'step': 4345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:14.800057', 'step': 4345, 'epoch': 1} {'type': 'loss', 'content': 0.14075452089309692, 'timestamp': '2025-09-10 02:31:14.811804', 'step': 4346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:14.864057', 'step': 4346, 'epoch': 1} {'type': 'loss', 'content': 0.21068184077739716, 'timestamp': '2025-09-10 02:31:14.867882', 'step': 4347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:14.912572', 'step': 4347, 'epoch': 1} {'type': 'loss', 'content': 0.18327341973781586, 'timestamp': '2025-09-10 02:31:14.938517', 'step': 4348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:15.239569', 'step': 4348, 'epoch': 1} {'type': 'loss', 'content': 0.16363525390625, 'timestamp': '2025-09-10 02:31:15.243801', 'step': 4349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:15.290911', 'step': 4349, 'epoch': 1} {'type': 'loss', 'content': 0.21569854021072388, 'timestamp': '2025-09-10 02:31:15.299608', 'step': 4350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:15.370240', 'step': 4350, 'epoch': 1} {'type': 'loss', 'content': 0.1389399915933609, 'timestamp': '2025-09-10 02:31:15.379321', 'step': 4351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.415428', 'step': 4351, 'epoch': 1} {'type': 'loss', 'content': 0.1829020231962204, 'timestamp': '2025-09-10 02:31:15.442624', 'step': 4352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.476211', 'step': 4352, 'epoch': 1} {'type': 'loss', 'content': 0.11817081272602081, 'timestamp': '2025-09-10 02:31:15.479489', 'step': 4353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:15.512664', 'step': 4353, 'epoch': 1} {'type': 'loss', 'content': 0.1829960197210312, 'timestamp': '2025-09-10 02:31:15.514772', 'step': 4354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.550597', 'step': 4354, 'epoch': 1} {'type': 'loss', 'content': 0.23905688524246216, 'timestamp': '2025-09-10 02:31:15.552627', 'step': 4355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:15.582549', 'step': 4355, 'epoch': 1} {'type': 'loss', 'content': 0.058134302496910095, 'timestamp': '2025-09-10 02:31:15.606998', 'step': 4356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:15.649382', 'step': 4356, 'epoch': 1} {'type': 'loss', 'content': 0.24056962132453918, 'timestamp': '2025-09-10 02:31:15.651364', 'step': 4357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:15.681549', 'step': 4357, 'epoch': 1} {'type': 'loss', 'content': 0.18123388290405273, 'timestamp': '2025-09-10 02:31:15.683655', 'step': 4358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:15.716250', 'step': 4358, 'epoch': 1} {'type': 'loss', 'content': 0.23099716007709503, 'timestamp': '2025-09-10 02:31:15.725311', 'step': 4359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:15.767820', 'step': 4359, 'epoch': 1} {'type': 'loss', 'content': 0.05227033421397209, 'timestamp': '2025-09-10 02:31:15.793210', 'step': 4360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:15.827337', 'step': 4360, 'epoch': 1} {'type': 'loss', 'content': 0.10133743286132812, 'timestamp': '2025-09-10 02:31:15.833148', 'step': 4361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:15.872247', 'step': 4361, 'epoch': 1} {'type': 'loss', 'content': 0.23325896263122559, 'timestamp': '2025-09-10 02:31:15.874998', 'step': 4362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.905471', 'step': 4362, 'epoch': 1} {'type': 'loss', 'content': 0.08995649218559265, 'timestamp': '2025-09-10 02:31:15.909662', 'step': 4363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:15.942802', 'step': 4363, 'epoch': 1} {'type': 'loss', 'content': 0.16808953881263733, 'timestamp': '2025-09-10 02:31:15.966336', 'step': 4364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.004452', 'step': 4364, 'epoch': 1} {'type': 'loss', 'content': 0.16772007942199707, 'timestamp': '2025-09-10 02:31:16.008513', 'step': 4365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.061297', 'step': 4365, 'epoch': 1} {'type': 'loss', 'content': 0.15552844107151031, 'timestamp': '2025-09-10 02:31:16.067760', 'step': 4366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.101562', 'step': 4366, 'epoch': 1} {'type': 'loss', 'content': 0.138884499669075, 'timestamp': '2025-09-10 02:31:16.105705', 'step': 4367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:16.158798', 'step': 4367, 'epoch': 1} {'type': 'loss', 'content': 0.2873844802379608, 'timestamp': '2025-09-10 02:31:16.188917', 'step': 4368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.231299', 'step': 4368, 'epoch': 1} {'type': 'loss', 'content': 0.13835148513317108, 'timestamp': '2025-09-10 02:31:16.234126', 'step': 4369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:16.265769', 'step': 4369, 'epoch': 1} {'type': 'loss', 'content': 0.29418978095054626, 'timestamp': '2025-09-10 02:31:16.268730', 'step': 4370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.300172', 'step': 4370, 'epoch': 1} {'type': 'loss', 'content': 0.1701010912656784, 'timestamp': '2025-09-10 02:31:16.302472', 'step': 4371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:16.336543', 'step': 4371, 'epoch': 1} {'type': 'loss', 'content': 0.23149797320365906, 'timestamp': '2025-09-10 02:31:16.363663', 'step': 4372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.407847', 'step': 4372, 'epoch': 1} {'type': 'loss', 'content': 0.15562684834003448, 'timestamp': '2025-09-10 02:31:16.410992', 'step': 4373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:16.443367', 'step': 4373, 'epoch': 1} {'type': 'loss', 'content': 0.25674188137054443, 'timestamp': '2025-09-10 02:31:16.445745', 'step': 4374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:16.477462', 'step': 4374, 'epoch': 1} {'type': 'loss', 'content': 0.2632237672805786, 'timestamp': '2025-09-10 02:31:16.483475', 'step': 4375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.532178', 'step': 4375, 'epoch': 1} {'type': 'loss', 'content': 0.13591474294662476, 'timestamp': '2025-09-10 02:31:16.558945', 'step': 4376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:16.590347', 'step': 4376, 'epoch': 1} {'type': 'loss', 'content': 0.09480602294206619, 'timestamp': '2025-09-10 02:31:16.593001', 'step': 4377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.626494', 'step': 4377, 'epoch': 1} {'type': 'loss', 'content': 0.13080134987831116, 'timestamp': '2025-09-10 02:31:16.629841', 'step': 4378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:16.663282', 'step': 4378, 'epoch': 1} {'type': 'loss', 'content': 0.1897391676902771, 'timestamp': '2025-09-10 02:31:16.665398', 'step': 4379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.695947', 'step': 4379, 'epoch': 1} {'type': 'loss', 'content': 0.12193606793880463, 'timestamp': '2025-09-10 02:31:16.719766', 'step': 4380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:16.753958', 'step': 4380, 'epoch': 1} {'type': 'loss', 'content': 0.23924385011196136, 'timestamp': '2025-09-10 02:31:16.756663', 'step': 4381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.793729', 'step': 4381, 'epoch': 1} {'type': 'loss', 'content': 0.2335568070411682, 'timestamp': '2025-09-10 02:31:16.795564', 'step': 4382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.826164', 'step': 4382, 'epoch': 1} {'type': 'loss', 'content': 0.15707460045814514, 'timestamp': '2025-09-10 02:31:16.830684', 'step': 4383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.883564', 'step': 4383, 'epoch': 1} {'type': 'loss', 'content': 0.12523123621940613, 'timestamp': '2025-09-10 02:31:16.911667', 'step': 4384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.223933', 'step': 4384, 'epoch': 1} {'type': 'loss', 'content': 0.1889878660440445, 'timestamp': '2025-09-10 02:31:17.238772', 'step': 4385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:17.302486', 'step': 4385, 'epoch': 1} {'type': 'loss', 'content': 0.16744080185890198, 'timestamp': '2025-09-10 02:31:17.315953', 'step': 4386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.436278', 'step': 4386, 'epoch': 1} {'type': 'loss', 'content': 0.23073150217533112, 'timestamp': '2025-09-10 02:31:17.451865', 'step': 4387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.524040', 'step': 4387, 'epoch': 1} {'type': 'loss', 'content': 0.0888458713889122, 'timestamp': '2025-09-10 02:31:17.575896', 'step': 4388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:17.625847', 'step': 4388, 'epoch': 1} {'type': 'loss', 'content': 0.15058693289756775, 'timestamp': '2025-09-10 02:31:17.638854', 'step': 4389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.693866', 'step': 4389, 'epoch': 1} {'type': 'loss', 'content': 0.1530243158340454, 'timestamp': '2025-09-10 02:31:17.715496', 'step': 4390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:17.764955', 'step': 4390, 'epoch': 1} {'type': 'loss', 'content': 0.1587502509355545, 'timestamp': '2025-09-10 02:31:17.775464', 'step': 4391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:17.827221', 'step': 4391, 'epoch': 1} {'type': 'loss', 'content': 0.17647990584373474, 'timestamp': '2025-09-10 02:31:17.853948', 'step': 4392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:17.902056', 'step': 4392, 'epoch': 1} {'type': 'loss', 'content': 0.10371190309524536, 'timestamp': '2025-09-10 02:31:17.912676', 'step': 4393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.967210', 'step': 4393, 'epoch': 1} {'type': 'loss', 'content': 0.15805038809776306, 'timestamp': '2025-09-10 02:31:17.974425', 'step': 4394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.025395', 'step': 4394, 'epoch': 1} {'type': 'loss', 'content': 0.1380767524242401, 'timestamp': '2025-09-10 02:31:18.033013', 'step': 4395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.084658', 'step': 4395, 'epoch': 1} {'type': 'loss', 'content': 0.206951305270195, 'timestamp': '2025-09-10 02:31:18.115176', 'step': 4396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:18.189479', 'step': 4396, 'epoch': 1} {'type': 'loss', 'content': 0.17598600685596466, 'timestamp': '2025-09-10 02:31:18.200511', 'step': 4397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.258556', 'step': 4397, 'epoch': 1} {'type': 'loss', 'content': 0.156542107462883, 'timestamp': '2025-09-10 02:31:18.275800', 'step': 4398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.346437', 'step': 4398, 'epoch': 1} {'type': 'loss', 'content': 0.12865658104419708, 'timestamp': '2025-09-10 02:31:18.374666', 'step': 4399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.458138', 'step': 4399, 'epoch': 1} {'type': 'loss', 'content': 0.17787618935108185, 'timestamp': '2025-09-10 02:31:18.502258', 'step': 4400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.592389', 'step': 4400, 'epoch': 1} {'type': 'loss', 'content': 0.1488511860370636, 'timestamp': '2025-09-10 02:31:18.608460', 'step': 4401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:18.671469', 'step': 4401, 'epoch': 1} {'type': 'loss', 'content': 0.26451292634010315, 'timestamp': '2025-09-10 02:31:18.675851', 'step': 4402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:18.727329', 'step': 4402, 'epoch': 1} {'type': 'loss', 'content': 0.2177564948797226, 'timestamp': '2025-09-10 02:31:18.734156', 'step': 4403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.772236', 'step': 4403, 'epoch': 1} {'type': 'loss', 'content': 0.16539372503757477, 'timestamp': '2025-09-10 02:31:18.798212', 'step': 4404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.834910', 'step': 4404, 'epoch': 1} {'type': 'loss', 'content': 0.08802267163991928, 'timestamp': '2025-09-10 02:31:18.837194', 'step': 4405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:18.868874', 'step': 4405, 'epoch': 1} {'type': 'loss', 'content': 0.1615055948495865, 'timestamp': '2025-09-10 02:31:18.871618', 'step': 4406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.906455', 'step': 4406, 'epoch': 1} {'type': 'loss', 'content': 0.08550972491502762, 'timestamp': '2025-09-10 02:31:18.908891', 'step': 4407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:18.941285', 'step': 4407, 'epoch': 1} {'type': 'loss', 'content': 0.15117691457271576, 'timestamp': '2025-09-10 02:31:18.965785', 'step': 4408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:19.000938', 'step': 4408, 'epoch': 1} {'type': 'loss', 'content': 0.10236814618110657, 'timestamp': '2025-09-10 02:31:19.003858', 'step': 4409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.037552', 'step': 4409, 'epoch': 1} {'type': 'loss', 'content': 0.17852672934532166, 'timestamp': '2025-09-10 02:31:19.040266', 'step': 4410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:19.077612', 'step': 4410, 'epoch': 1} {'type': 'loss', 'content': 0.2032746821641922, 'timestamp': '2025-09-10 02:31:19.084962', 'step': 4411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:19.119936', 'step': 4411, 'epoch': 1} {'type': 'loss', 'content': 0.14489208161830902, 'timestamp': '2025-09-10 02:31:19.144142', 'step': 4412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:19.183348', 'step': 4412, 'epoch': 1} {'type': 'loss', 'content': 0.1576092392206192, 'timestamp': '2025-09-10 02:31:19.185987', 'step': 4413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.218077', 'step': 4413, 'epoch': 1} {'type': 'loss', 'content': 0.24091662466526031, 'timestamp': '2025-09-10 02:31:19.221973', 'step': 4414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.255444', 'step': 4414, 'epoch': 1} {'type': 'loss', 'content': 0.09322074800729752, 'timestamp': '2025-09-10 02:31:19.258101', 'step': 4415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.296752', 'step': 4415, 'epoch': 1} {'type': 'loss', 'content': 0.1660929024219513, 'timestamp': '2025-09-10 02:31:19.324809', 'step': 4416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.359738', 'step': 4416, 'epoch': 1} {'type': 'loss', 'content': 0.11315928399562836, 'timestamp': '2025-09-10 02:31:19.366393', 'step': 4417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.408547', 'step': 4417, 'epoch': 1} {'type': 'loss', 'content': 0.17494001984596252, 'timestamp': '2025-09-10 02:31:19.411968', 'step': 4418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.464986', 'step': 4418, 'epoch': 1} {'type': 'loss', 'content': 0.12341292947530746, 'timestamp': '2025-09-10 02:31:19.483286', 'step': 4419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.548994', 'step': 4419, 'epoch': 1} {'type': 'loss', 'content': 0.19570687413215637, 'timestamp': '2025-09-10 02:31:19.572869', 'step': 4420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.605358', 'step': 4420, 'epoch': 1} {'type': 'loss', 'content': 0.15135008096694946, 'timestamp': '2025-09-10 02:31:19.608597', 'step': 4421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:19.640526', 'step': 4421, 'epoch': 1} {'type': 'loss', 'content': 0.1957581490278244, 'timestamp': '2025-09-10 02:31:19.642982', 'step': 4422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.674550', 'step': 4422, 'epoch': 1} {'type': 'loss', 'content': 0.2478695511817932, 'timestamp': '2025-09-10 02:31:19.677223', 'step': 4423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:19.716410', 'step': 4423, 'epoch': 1} {'type': 'loss', 'content': 0.1441160887479782, 'timestamp': '2025-09-10 02:31:19.742362', 'step': 4424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.779068', 'step': 4424, 'epoch': 1} {'type': 'loss', 'content': 0.1827535778284073, 'timestamp': '2025-09-10 02:31:19.781733', 'step': 4425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.813724', 'step': 4425, 'epoch': 1} {'type': 'loss', 'content': 0.19121982157230377, 'timestamp': '2025-09-10 02:31:19.816041', 'step': 4426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:19.846628', 'step': 4426, 'epoch': 1} {'type': 'loss', 'content': 0.24022190272808075, 'timestamp': '2025-09-10 02:31:19.849260', 'step': 4427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.881372', 'step': 4427, 'epoch': 1} {'type': 'loss', 'content': 0.08858761191368103, 'timestamp': '2025-09-10 02:31:19.905932', 'step': 4428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.941025', 'step': 4428, 'epoch': 1} {'type': 'loss', 'content': 0.1944870948791504, 'timestamp': '2025-09-10 02:31:19.946523', 'step': 4429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.982281', 'step': 4429, 'epoch': 1} {'type': 'loss', 'content': 0.15773437917232513, 'timestamp': '2025-09-10 02:31:19.985667', 'step': 4430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:20.020988', 'step': 4430, 'epoch': 1} {'type': 'loss', 'content': 0.16402263939380646, 'timestamp': '2025-09-10 02:31:20.025663', 'step': 4431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.063406', 'step': 4431, 'epoch': 1} {'type': 'loss', 'content': 0.24764807522296906, 'timestamp': '2025-09-10 02:31:20.089705', 'step': 4432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.122325', 'step': 4432, 'epoch': 1} {'type': 'loss', 'content': 0.18225601315498352, 'timestamp': '2025-09-10 02:31:20.126649', 'step': 4433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.162259', 'step': 4433, 'epoch': 1} {'type': 'loss', 'content': 0.15301474928855896, 'timestamp': '2025-09-10 02:31:20.164744', 'step': 4434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.196056', 'step': 4434, 'epoch': 1} {'type': 'loss', 'content': 0.26680365204811096, 'timestamp': '2025-09-10 02:31:20.198790', 'step': 4435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.230206', 'step': 4435, 'epoch': 1} {'type': 'loss', 'content': 0.07303228229284286, 'timestamp': '2025-09-10 02:31:20.257640', 'step': 4436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.289935', 'step': 4436, 'epoch': 1} {'type': 'loss', 'content': 0.22072944045066833, 'timestamp': '2025-09-10 02:31:20.292574', 'step': 4437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.330465', 'step': 4437, 'epoch': 1} {'type': 'loss', 'content': 0.11016049236059189, 'timestamp': '2025-09-10 02:31:20.333424', 'step': 4438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:20.368586', 'step': 4438, 'epoch': 1} {'type': 'loss', 'content': 0.15461821854114532, 'timestamp': '2025-09-10 02:31:20.371114', 'step': 4439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.402202', 'step': 4439, 'epoch': 1} {'type': 'loss', 'content': 0.1399541050195694, 'timestamp': '2025-09-10 02:31:20.426149', 'step': 4440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:20.458753', 'step': 4440, 'epoch': 1} {'type': 'loss', 'content': 0.3647075593471527, 'timestamp': '2025-09-10 02:31:20.462069', 'step': 4441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.495325', 'step': 4441, 'epoch': 1} {'type': 'loss', 'content': 0.1397463083267212, 'timestamp': '2025-09-10 02:31:20.497936', 'step': 4442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.531030', 'step': 4442, 'epoch': 1} {'type': 'loss', 'content': 0.20017793774604797, 'timestamp': '2025-09-10 02:31:20.533224', 'step': 4443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.564257', 'step': 4443, 'epoch': 1} {'type': 'loss', 'content': 0.24016623198986053, 'timestamp': '2025-09-10 02:31:20.588541', 'step': 4444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:20.621199', 'step': 4444, 'epoch': 1} {'type': 'loss', 'content': 0.1568870097398758, 'timestamp': '2025-09-10 02:31:20.624388', 'step': 4445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.656750', 'step': 4445, 'epoch': 1} {'type': 'loss', 'content': 0.12303774803876877, 'timestamp': '2025-09-10 02:31:20.659603', 'step': 4446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.692582', 'step': 4446, 'epoch': 1} {'type': 'loss', 'content': 0.0947057381272316, 'timestamp': '2025-09-10 02:31:20.694785', 'step': 4447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.726317', 'step': 4447, 'epoch': 1} {'type': 'loss', 'content': 0.21949727833271027, 'timestamp': '2025-09-10 02:31:20.751862', 'step': 4448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:20.784989', 'step': 4448, 'epoch': 1} {'type': 'loss', 'content': 0.0965825766324997, 'timestamp': '2025-09-10 02:31:20.787449', 'step': 4449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:20.819947', 'step': 4449, 'epoch': 1} {'type': 'loss', 'content': 0.2647029459476471, 'timestamp': '2025-09-10 02:31:20.823871', 'step': 4450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.859635', 'step': 4450, 'epoch': 1} {'type': 'loss', 'content': 0.1957150101661682, 'timestamp': '2025-09-10 02:31:20.862189', 'step': 4451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:20.894204', 'step': 4451, 'epoch': 1} {'type': 'loss', 'content': 0.15575586259365082, 'timestamp': '2025-09-10 02:31:20.918059', 'step': 4452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.954435', 'step': 4452, 'epoch': 1} {'type': 'loss', 'content': 0.1493702530860901, 'timestamp': '2025-09-10 02:31:20.956586', 'step': 4453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.992582', 'step': 4453, 'epoch': 1} {'type': 'loss', 'content': 0.15263374149799347, 'timestamp': '2025-09-10 02:31:20.994638', 'step': 4454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:21.032081', 'step': 4454, 'epoch': 1} {'type': 'loss', 'content': 0.24522623419761658, 'timestamp': '2025-09-10 02:31:21.034429', 'step': 4455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.066187', 'step': 4455, 'epoch': 1} {'type': 'loss', 'content': 0.1177227720618248, 'timestamp': '2025-09-10 02:31:21.090240', 'step': 4456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.123768', 'step': 4456, 'epoch': 1} {'type': 'loss', 'content': 0.08853379637002945, 'timestamp': '2025-09-10 02:31:21.127980', 'step': 4457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:21.159298', 'step': 4457, 'epoch': 1} {'type': 'loss', 'content': 0.15914125740528107, 'timestamp': '2025-09-10 02:31:21.161891', 'step': 4458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:21.193296', 'step': 4458, 'epoch': 1} {'type': 'loss', 'content': 0.0886508896946907, 'timestamp': '2025-09-10 02:31:21.197607', 'step': 4459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.230002', 'step': 4459, 'epoch': 1} {'type': 'loss', 'content': 0.19760248064994812, 'timestamp': '2025-09-10 02:31:21.255611', 'step': 4460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.287233', 'step': 4460, 'epoch': 1} {'type': 'loss', 'content': 0.16034941375255585, 'timestamp': '2025-09-10 02:31:21.290241', 'step': 4461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.323115', 'step': 4461, 'epoch': 1} {'type': 'loss', 'content': 0.06892631202936172, 'timestamp': '2025-09-10 02:31:21.326661', 'step': 4462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.359862', 'step': 4462, 'epoch': 1} {'type': 'loss', 'content': 0.18982277810573578, 'timestamp': '2025-09-10 02:31:21.362680', 'step': 4463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.395294', 'step': 4463, 'epoch': 1} {'type': 'loss', 'content': 0.22137132287025452, 'timestamp': '2025-09-10 02:31:21.419202', 'step': 4464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.451529', 'step': 4464, 'epoch': 1} {'type': 'loss', 'content': 0.12191776931285858, 'timestamp': '2025-09-10 02:31:21.454397', 'step': 4465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.486568', 'step': 4465, 'epoch': 1} {'type': 'loss', 'content': 0.1941699981689453, 'timestamp': '2025-09-10 02:31:21.489525', 'step': 4466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:21.524012', 'step': 4466, 'epoch': 1} {'type': 'loss', 'content': 0.16913551092147827, 'timestamp': '2025-09-10 02:31:21.526932', 'step': 4467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:31:21.560199', 'step': 4467, 'epoch': 1} {'type': 'loss', 'content': 0.134031280875206, 'timestamp': '2025-09-10 02:31:21.587842', 'step': 4468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:21.624605', 'step': 4468, 'epoch': 1} {'type': 'loss', 'content': 0.09919016808271408, 'timestamp': '2025-09-10 02:31:21.629715', 'step': 4469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:21.666948', 'step': 4469, 'epoch': 1} {'type': 'loss', 'content': 0.1411837637424469, 'timestamp': '2025-09-10 02:31:21.670151', 'step': 4470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:21.703098', 'step': 4470, 'epoch': 1} {'type': 'loss', 'content': 0.24730442464351654, 'timestamp': '2025-09-10 02:31:21.706357', 'step': 4471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:21.739581', 'step': 4471, 'epoch': 1} {'type': 'loss', 'content': 0.12046559900045395, 'timestamp': '2025-09-10 02:31:21.763376', 'step': 4472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.795578', 'step': 4472, 'epoch': 1} {'type': 'loss', 'content': 0.21384111046791077, 'timestamp': '2025-09-10 02:31:21.798235', 'step': 4473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:21.831718', 'step': 4473, 'epoch': 1} {'type': 'loss', 'content': 0.07491905242204666, 'timestamp': '2025-09-10 02:31:21.835914', 'step': 4474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:21.869859', 'step': 4474, 'epoch': 1} {'type': 'loss', 'content': 0.21447625756263733, 'timestamp': '2025-09-10 02:31:21.872462', 'step': 4475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:21.911654', 'step': 4475, 'epoch': 1} {'type': 'loss', 'content': 0.1613771915435791, 'timestamp': '2025-09-10 02:31:21.935710', 'step': 4476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:21.968772', 'step': 4476, 'epoch': 1} {'type': 'loss', 'content': 0.11769145727157593, 'timestamp': '2025-09-10 02:31:21.972040', 'step': 4477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:22.005104', 'step': 4477, 'epoch': 1} {'type': 'loss', 'content': 0.10001986473798752, 'timestamp': '2025-09-10 02:31:22.007219', 'step': 4478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:22.038944', 'step': 4478, 'epoch': 1} {'type': 'loss', 'content': 0.09131228923797607, 'timestamp': '2025-09-10 02:31:22.041396', 'step': 4479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:22.072603', 'step': 4479, 'epoch': 1} {'type': 'loss', 'content': 0.17598016560077667, 'timestamp': '2025-09-10 02:31:22.097280', 'step': 4480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:22.131036', 'step': 4480, 'epoch': 1} {'type': 'loss', 'content': 0.18554896116256714, 'timestamp': '2025-09-10 02:31:22.133981', 'step': 4481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:22.169949', 'step': 4481, 'epoch': 1} {'type': 'loss', 'content': 0.15838271379470825, 'timestamp': '2025-09-10 02:31:22.172678', 'step': 4482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:22.206184', 'step': 4482, 'epoch': 1} {'type': 'loss', 'content': 0.11540411412715912, 'timestamp': '2025-09-10 02:31:22.209106', 'step': 4483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:22.241977', 'step': 4483, 'epoch': 1} {'type': 'loss', 'content': 0.14255103468894958, 'timestamp': '2025-09-10 02:31:22.266042', 'step': 4484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:22.298601', 'step': 4484, 'epoch': 1} {'type': 'loss', 'content': 0.08847085386514664, 'timestamp': '2025-09-10 02:31:22.301578', 'step': 4485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:22.333934', 'step': 4485, 'epoch': 1} {'type': 'loss', 'content': 0.22241730988025665, 'timestamp': '2025-09-10 02:31:22.336323', 'step': 4486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:22.369974', 'step': 4486, 'epoch': 1} {'type': 'loss', 'content': 0.16884814202785492, 'timestamp': '2025-09-10 02:31:22.372280', 'step': 4487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:22.406953', 'step': 4487, 'epoch': 1} {'type': 'loss', 'content': 0.10450349003076553, 'timestamp': '2025-09-10 02:31:22.431116', 'step': 4488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:22.463275', 'step': 4488, 'epoch': 1} {'type': 'loss', 'content': 0.1444719433784485, 'timestamp': '2025-09-10 02:31:22.465521', 'step': 4489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:22.498876', 'step': 4489, 'epoch': 1} {'type': 'loss', 'content': 0.10356561094522476, 'timestamp': '2025-09-10 02:31:22.501258', 'step': 4490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:22.533114', 'step': 4490, 'epoch': 1} {'type': 'loss', 'content': 0.18228597939014435, 'timestamp': '2025-09-10 02:31:22.535813', 'step': 4491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:22.569031', 'step': 4491, 'epoch': 1} {'type': 'loss', 'content': 0.14210869371891022, 'timestamp': '2025-09-10 02:31:22.593394', 'step': 4492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:22.627001', 'step': 4492, 'epoch': 1} {'type': 'loss', 'content': 0.14092746376991272, 'timestamp': '2025-09-10 02:31:22.630029', 'step': 4493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:22.665402', 'step': 4493, 'epoch': 1} {'type': 'loss', 'content': 0.10705548524856567, 'timestamp': '2025-09-10 02:31:22.669398', 'step': 4494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:22.703581', 'step': 4494, 'epoch': 1} {'type': 'loss', 'content': 0.1208222508430481, 'timestamp': '2025-09-10 02:31:22.706176', 'step': 4495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:22.739521', 'step': 4495, 'epoch': 1} {'type': 'loss', 'content': 0.15488941967487335, 'timestamp': '2025-09-10 02:31:22.765368', 'step': 4496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:22.837167', 'step': 4496, 'epoch': 1} {'type': 'loss', 'content': 0.30327433347702026, 'timestamp': '2025-09-10 02:31:22.845386', 'step': 4497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:22.904204', 'step': 4497, 'epoch': 1} {'type': 'loss', 'content': 0.1472070962190628, 'timestamp': '2025-09-10 02:31:22.923796', 'step': 4498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:22.983534', 'step': 4498, 'epoch': 1} {'type': 'loss', 'content': 0.20776978135108948, 'timestamp': '2025-09-10 02:31:22.996536', 'step': 4499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:23.043090', 'step': 4499, 'epoch': 1} {'type': 'loss', 'content': 0.09779617935419083, 'timestamp': '2025-09-10 02:31:23.075374', 'step': 4500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-09-10 02:31:27.960933', 'step': 4500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.998160', 'step': 4500, 'epoch': 1} {'type': 'loss', 'content': 0.15948912501335144, 'timestamp': '2025-09-10 02:31:28.003445', 'step': 4501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:28.043492', 'step': 4501, 'epoch': 1} {'type': 'loss', 'content': 0.10193941742181778, 'timestamp': '2025-09-10 02:31:28.046549', 'step': 4502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.078506', 'step': 4502, 'epoch': 1} {'type': 'loss', 'content': 0.2476477324962616, 'timestamp': '2025-09-10 02:31:28.081211', 'step': 4503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.112907', 'step': 4503, 'epoch': 1} {'type': 'loss', 'content': 0.153450608253479, 'timestamp': '2025-09-10 02:31:28.138094', 'step': 4504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.170774', 'step': 4504, 'epoch': 1} {'type': 'loss', 'content': 0.25747013092041016, 'timestamp': '2025-09-10 02:31:28.174236', 'step': 4505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.204741', 'step': 4505, 'epoch': 1} {'type': 'loss', 'content': 0.10033735632896423, 'timestamp': '2025-09-10 02:31:28.207828', 'step': 4506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.239222', 'step': 4506, 'epoch': 1} {'type': 'loss', 'content': 0.23243683576583862, 'timestamp': '2025-09-10 02:31:28.244363', 'step': 4507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.285154', 'step': 4507, 'epoch': 1} {'type': 'loss', 'content': 0.2038845717906952, 'timestamp': '2025-09-10 02:31:28.310790', 'step': 4508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:28.366715', 'step': 4508, 'epoch': 1} {'type': 'loss', 'content': 0.1772260218858719, 'timestamp': '2025-09-10 02:31:28.374669', 'step': 4509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.423961', 'step': 4509, 'epoch': 1} {'type': 'loss', 'content': 0.16114287078380585, 'timestamp': '2025-09-10 02:31:28.430637', 'step': 4510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:28.468743', 'step': 4510, 'epoch': 1} {'type': 'loss', 'content': 0.16684460639953613, 'timestamp': '2025-09-10 02:31:28.474573', 'step': 4511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:28.516208', 'step': 4511, 'epoch': 1} {'type': 'loss', 'content': 0.18994133174419403, 'timestamp': '2025-09-10 02:31:28.540898', 'step': 4512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.575148', 'step': 4512, 'epoch': 1} {'type': 'loss', 'content': 0.18293780088424683, 'timestamp': '2025-09-10 02:31:28.578249', 'step': 4513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.610326', 'step': 4513, 'epoch': 1} {'type': 'loss', 'content': 0.2167225182056427, 'timestamp': '2025-09-10 02:31:28.614306', 'step': 4514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.648223', 'step': 4514, 'epoch': 1} {'type': 'loss', 'content': 0.15777644515037537, 'timestamp': '2025-09-10 02:31:28.650470', 'step': 4515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.682605', 'step': 4515, 'epoch': 1} {'type': 'loss', 'content': 0.12630516290664673, 'timestamp': '2025-09-10 02:31:28.707403', 'step': 4516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.743281', 'step': 4516, 'epoch': 1} {'type': 'loss', 'content': 0.11134781688451767, 'timestamp': '2025-09-10 02:31:28.746919', 'step': 4517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.780626', 'step': 4517, 'epoch': 1} {'type': 'loss', 'content': 0.17877280712127686, 'timestamp': '2025-09-10 02:31:28.784842', 'step': 4518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.819031', 'step': 4518, 'epoch': 1} {'type': 'loss', 'content': 0.13348032534122467, 'timestamp': '2025-09-10 02:31:28.822155', 'step': 4519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:28.854797', 'step': 4519, 'epoch': 1} {'type': 'loss', 'content': 0.15354642271995544, 'timestamp': '2025-09-10 02:31:28.878305', 'step': 4520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:28.909628', 'step': 4520, 'epoch': 1} {'type': 'loss', 'content': 0.1985325813293457, 'timestamp': '2025-09-10 02:31:28.912598', 'step': 4521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.943559', 'step': 4521, 'epoch': 1} {'type': 'loss', 'content': 0.17384248971939087, 'timestamp': '2025-09-10 02:31:28.947194', 'step': 4522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.977758', 'step': 4522, 'epoch': 1} {'type': 'loss', 'content': 0.10989337414503098, 'timestamp': '2025-09-10 02:31:28.980416', 'step': 4523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.012655', 'step': 4523, 'epoch': 1} {'type': 'loss', 'content': 0.1421288400888443, 'timestamp': '2025-09-10 02:31:29.036615', 'step': 4524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:29.068380', 'step': 4524, 'epoch': 1} {'type': 'loss', 'content': 0.11953077465295792, 'timestamp': '2025-09-10 02:31:29.070986', 'step': 4525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:29.101346', 'step': 4525, 'epoch': 1} {'type': 'loss', 'content': 0.17892949283123016, 'timestamp': '2025-09-10 02:31:29.104367', 'step': 4526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:29.138195', 'step': 4526, 'epoch': 1} {'type': 'loss', 'content': 0.167307510972023, 'timestamp': '2025-09-10 02:31:29.140981', 'step': 4527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:29.172365', 'step': 4527, 'epoch': 1} {'type': 'loss', 'content': 0.10647711902856827, 'timestamp': '2025-09-10 02:31:29.196732', 'step': 4528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:29.229550', 'step': 4528, 'epoch': 1} {'type': 'loss', 'content': 0.1873786747455597, 'timestamp': '2025-09-10 02:31:29.232356', 'step': 4529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:29.263512', 'step': 4529, 'epoch': 1} {'type': 'loss', 'content': 0.13996393978595734, 'timestamp': '2025-09-10 02:31:29.266789', 'step': 4530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:29.300024', 'step': 4530, 'epoch': 1} {'type': 'loss', 'content': 0.14970143139362335, 'timestamp': '2025-09-10 02:31:29.303125', 'step': 4531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:29.335869', 'step': 4531, 'epoch': 1} {'type': 'loss', 'content': 0.17818444967269897, 'timestamp': '2025-09-10 02:31:29.360575', 'step': 4532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:29.392251', 'step': 4532, 'epoch': 1} {'type': 'loss', 'content': 0.1126699298620224, 'timestamp': '2025-09-10 02:31:29.395417', 'step': 4533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:29.426915', 'step': 4533, 'epoch': 1} {'type': 'loss', 'content': 0.324215829372406, 'timestamp': '2025-09-10 02:31:29.430154', 'step': 4534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.462157', 'step': 4534, 'epoch': 1} {'type': 'loss', 'content': 0.15570013225078583, 'timestamp': '2025-09-10 02:31:29.464799', 'step': 4535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:29.495827', 'step': 4535, 'epoch': 1} {'type': 'loss', 'content': 0.16436712443828583, 'timestamp': '2025-09-10 02:31:29.519310', 'step': 4536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.551094', 'step': 4536, 'epoch': 1} {'type': 'loss', 'content': 0.19351762533187866, 'timestamp': '2025-09-10 02:31:29.553771', 'step': 4537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:29.584896', 'step': 4537, 'epoch': 1} {'type': 'loss', 'content': 0.18224303424358368, 'timestamp': '2025-09-10 02:31:29.588168', 'step': 4538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.619800', 'step': 4538, 'epoch': 1} {'type': 'loss', 'content': 0.13240177929401398, 'timestamp': '2025-09-10 02:31:29.622172', 'step': 4539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:29.653940', 'step': 4539, 'epoch': 1} {'type': 'loss', 'content': 0.1742239147424698, 'timestamp': '2025-09-10 02:31:29.678413', 'step': 4540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:29.709848', 'step': 4540, 'epoch': 1} {'type': 'loss', 'content': 0.18739411234855652, 'timestamp': '2025-09-10 02:31:29.712334', 'step': 4541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:31:29.747336', 'step': 4541, 'epoch': 1} {'type': 'loss', 'content': 0.11080151051282883, 'timestamp': '2025-09-10 02:31:29.752505', 'step': 4542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.789734', 'step': 4542, 'epoch': 1} {'type': 'loss', 'content': 0.23785574734210968, 'timestamp': '2025-09-10 02:31:29.792487', 'step': 4543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.823224', 'step': 4543, 'epoch': 1} {'type': 'loss', 'content': 0.08423402160406113, 'timestamp': '2025-09-10 02:31:29.847874', 'step': 4544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.881756', 'step': 4544, 'epoch': 1} {'type': 'loss', 'content': 0.27190762758255005, 'timestamp': '2025-09-10 02:31:29.885091', 'step': 4545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.915376', 'step': 4545, 'epoch': 1} {'type': 'loss', 'content': 0.09796160459518433, 'timestamp': '2025-09-10 02:31:29.919841', 'step': 4546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.952175', 'step': 4546, 'epoch': 1} {'type': 'loss', 'content': 0.13577237725257874, 'timestamp': '2025-09-10 02:31:29.955425', 'step': 4547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:29.985982', 'step': 4547, 'epoch': 1} {'type': 'loss', 'content': 0.1755475550889969, 'timestamp': '2025-09-10 02:31:30.011225', 'step': 4548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:30.041865', 'step': 4548, 'epoch': 1} {'type': 'loss', 'content': 0.1407446712255478, 'timestamp': '2025-09-10 02:31:30.044372', 'step': 4549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:30.075744', 'step': 4549, 'epoch': 1} {'type': 'loss', 'content': 0.2132328748703003, 'timestamp': '2025-09-10 02:31:30.079088', 'step': 4550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.109768', 'step': 4550, 'epoch': 1} {'type': 'loss', 'content': 0.04870808497071266, 'timestamp': '2025-09-10 02:31:30.112769', 'step': 4551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:30.143525', 'step': 4551, 'epoch': 1} {'type': 'loss', 'content': 0.13981135189533234, 'timestamp': '2025-09-10 02:31:30.167675', 'step': 4552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.199697', 'step': 4552, 'epoch': 1} {'type': 'loss', 'content': 0.1480722874403, 'timestamp': '2025-09-10 02:31:30.202451', 'step': 4553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.234380', 'step': 4553, 'epoch': 1} {'type': 'loss', 'content': 0.15192806720733643, 'timestamp': '2025-09-10 02:31:30.237237', 'step': 4554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:30.268206', 'step': 4554, 'epoch': 1} {'type': 'loss', 'content': 0.16576328873634338, 'timestamp': '2025-09-10 02:31:30.273366', 'step': 4555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:30.303533', 'step': 4555, 'epoch': 1} {'type': 'loss', 'content': 0.07355558127164841, 'timestamp': '2025-09-10 02:31:30.327848', 'step': 4556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:30.358026', 'step': 4556, 'epoch': 1} {'type': 'loss', 'content': 0.12989924848079681, 'timestamp': '2025-09-10 02:31:30.360553', 'step': 4557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:30.391058', 'step': 4557, 'epoch': 1} {'type': 'loss', 'content': 0.11087898164987564, 'timestamp': '2025-09-10 02:31:30.394242', 'step': 4558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:30.426235', 'step': 4558, 'epoch': 1} {'type': 'loss', 'content': 0.1298341453075409, 'timestamp': '2025-09-10 02:31:30.428778', 'step': 4559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.460189', 'step': 4559, 'epoch': 1} {'type': 'loss', 'content': 0.1396685242652893, 'timestamp': '2025-09-10 02:31:30.484395', 'step': 4560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:30.516024', 'step': 4560, 'epoch': 1} {'type': 'loss', 'content': 0.11439040303230286, 'timestamp': '2025-09-10 02:31:30.518836', 'step': 4561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:30.551613', 'step': 4561, 'epoch': 1} {'type': 'loss', 'content': 0.14042928814888, 'timestamp': '2025-09-10 02:31:30.554487', 'step': 4562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:30.586914', 'step': 4562, 'epoch': 1} {'type': 'loss', 'content': 0.11196539551019669, 'timestamp': '2025-09-10 02:31:30.589494', 'step': 4563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.619233', 'step': 4563, 'epoch': 1} {'type': 'loss', 'content': 0.16506358981132507, 'timestamp': '2025-09-10 02:31:30.642372', 'step': 4564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.673634', 'step': 4564, 'epoch': 1} {'type': 'loss', 'content': 0.12314142286777496, 'timestamp': '2025-09-10 02:31:30.676237', 'step': 4565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:30.705479', 'step': 4565, 'epoch': 1} {'type': 'loss', 'content': 0.09337971359491348, 'timestamp': '2025-09-10 02:31:30.707899', 'step': 4566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.737497', 'step': 4566, 'epoch': 1} {'type': 'loss', 'content': 0.18211549520492554, 'timestamp': '2025-09-10 02:31:30.739588', 'step': 4567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.769370', 'step': 4567, 'epoch': 1} {'type': 'loss', 'content': 0.12464383244514465, 'timestamp': '2025-09-10 02:31:30.792730', 'step': 4568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.823270', 'step': 4568, 'epoch': 1} {'type': 'loss', 'content': 0.18903134763240814, 'timestamp': '2025-09-10 02:31:30.825342', 'step': 4569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:30.856678', 'step': 4569, 'epoch': 1} {'type': 'loss', 'content': 0.2438025325536728, 'timestamp': '2025-09-10 02:31:30.858698', 'step': 4570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:30.889012', 'step': 4570, 'epoch': 1} {'type': 'loss', 'content': 0.14135374128818512, 'timestamp': '2025-09-10 02:31:30.891629', 'step': 4571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:30.923194', 'step': 4571, 'epoch': 1} {'type': 'loss', 'content': 0.17226950824260712, 'timestamp': '2025-09-10 02:31:30.947052', 'step': 4572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:30.979027', 'step': 4572, 'epoch': 1} {'type': 'loss', 'content': 0.18827039003372192, 'timestamp': '2025-09-10 02:31:30.981415', 'step': 4573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:31.010986', 'step': 4573, 'epoch': 1} {'type': 'loss', 'content': 0.09169381856918335, 'timestamp': '2025-09-10 02:31:31.013693', 'step': 4574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.043500', 'step': 4574, 'epoch': 1} {'type': 'loss', 'content': 0.15440118312835693, 'timestamp': '2025-09-10 02:31:31.045908', 'step': 4575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.075985', 'step': 4575, 'epoch': 1} {'type': 'loss', 'content': 0.16923461854457855, 'timestamp': '2025-09-10 02:31:31.099731', 'step': 4576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:31.130196', 'step': 4576, 'epoch': 1} {'type': 'loss', 'content': 0.175933375954628, 'timestamp': '2025-09-10 02:31:31.132397', 'step': 4577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:31.163972', 'step': 4577, 'epoch': 1} {'type': 'loss', 'content': 0.1625102162361145, 'timestamp': '2025-09-10 02:31:31.166132', 'step': 4578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:31.195878', 'step': 4578, 'epoch': 1} {'type': 'loss', 'content': 0.0967506393790245, 'timestamp': '2025-09-10 02:31:31.198641', 'step': 4579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.228435', 'step': 4579, 'epoch': 1} {'type': 'loss', 'content': 0.1331876665353775, 'timestamp': '2025-09-10 02:31:31.251724', 'step': 4580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:31.281495', 'step': 4580, 'epoch': 1} {'type': 'loss', 'content': 0.156575545668602, 'timestamp': '2025-09-10 02:31:31.283683', 'step': 4581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:31.313964', 'step': 4581, 'epoch': 1} {'type': 'loss', 'content': 0.15829014778137207, 'timestamp': '2025-09-10 02:31:31.316362', 'step': 4582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:31.345848', 'step': 4582, 'epoch': 1} {'type': 'loss', 'content': 0.23574796319007874, 'timestamp': '2025-09-10 02:31:31.348175', 'step': 4583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:31.378585', 'step': 4583, 'epoch': 1} {'type': 'loss', 'content': 0.09816570580005646, 'timestamp': '2025-09-10 02:31:31.402363', 'step': 4584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.433360', 'step': 4584, 'epoch': 1} {'type': 'loss', 'content': 0.18693892657756805, 'timestamp': '2025-09-10 02:31:31.435521', 'step': 4585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.467005', 'step': 4585, 'epoch': 1} {'type': 'loss', 'content': 0.12152044475078583, 'timestamp': '2025-09-10 02:31:31.468874', 'step': 4586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.499022', 'step': 4586, 'epoch': 1} {'type': 'loss', 'content': 0.10470995306968689, 'timestamp': '2025-09-10 02:31:31.501361', 'step': 4587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:31.532086', 'step': 4587, 'epoch': 1} {'type': 'loss', 'content': 0.1824367344379425, 'timestamp': '2025-09-10 02:31:31.555703', 'step': 4588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:31.586388', 'step': 4588, 'epoch': 1} {'type': 'loss', 'content': 0.12428684532642365, 'timestamp': '2025-09-10 02:31:31.588781', 'step': 4589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:31.619277', 'step': 4589, 'epoch': 1} {'type': 'loss', 'content': 0.10545758157968521, 'timestamp': '2025-09-10 02:31:31.621432', 'step': 4590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.651420', 'step': 4590, 'epoch': 1} {'type': 'loss', 'content': 0.12576274573802948, 'timestamp': '2025-09-10 02:31:31.653851', 'step': 4591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:31:31.687188', 'step': 4591, 'epoch': 1} {'type': 'loss', 'content': 0.1739763766527176, 'timestamp': '2025-09-10 02:31:31.711851', 'step': 4592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:31.741551', 'step': 4592, 'epoch': 1} {'type': 'loss', 'content': 0.13990916311740875, 'timestamp': '2025-09-10 02:31:31.744479', 'step': 4593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:31.774211', 'step': 4593, 'epoch': 1} {'type': 'loss', 'content': 0.22967635095119476, 'timestamp': '2025-09-10 02:31:31.777368', 'step': 4594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:31.806769', 'step': 4594, 'epoch': 1} {'type': 'loss', 'content': 0.18656963109970093, 'timestamp': '2025-09-10 02:31:31.809155', 'step': 4595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:31.838543', 'step': 4595, 'epoch': 1} {'type': 'loss', 'content': 0.11761507391929626, 'timestamp': '2025-09-10 02:31:31.861893', 'step': 4596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:31.893345', 'step': 4596, 'epoch': 1} {'type': 'loss', 'content': 0.17450012266635895, 'timestamp': '2025-09-10 02:31:31.895877', 'step': 4597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:31.926178', 'step': 4597, 'epoch': 1} {'type': 'loss', 'content': 0.08997490257024765, 'timestamp': '2025-09-10 02:31:31.928420', 'step': 4598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:31.958629', 'step': 4598, 'epoch': 1} {'type': 'loss', 'content': 0.1398494839668274, 'timestamp': '2025-09-10 02:31:31.961710', 'step': 4599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:31.993298', 'step': 4599, 'epoch': 1} {'type': 'loss', 'content': 0.18390685319900513, 'timestamp': '2025-09-10 02:31:32.016933', 'step': 4600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.048445', 'step': 4600, 'epoch': 1} {'type': 'loss', 'content': 0.22935953736305237, 'timestamp': '2025-09-10 02:31:32.050590', 'step': 4601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.082244', 'step': 4601, 'epoch': 1} {'type': 'loss', 'content': 0.11477523297071457, 'timestamp': '2025-09-10 02:31:32.084481', 'step': 4602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:32.114851', 'step': 4602, 'epoch': 1} {'type': 'loss', 'content': 0.18722796440124512, 'timestamp': '2025-09-10 02:31:32.117038', 'step': 4603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.146665', 'step': 4603, 'epoch': 1} {'type': 'loss', 'content': 0.1373278945684433, 'timestamp': '2025-09-10 02:31:32.170828', 'step': 4604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.201913', 'step': 4604, 'epoch': 1} {'type': 'loss', 'content': 0.14976836740970612, 'timestamp': '2025-09-10 02:31:32.204125', 'step': 4605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.234554', 'step': 4605, 'epoch': 1} {'type': 'loss', 'content': 0.14589041471481323, 'timestamp': '2025-09-10 02:31:32.236793', 'step': 4606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.267249', 'step': 4606, 'epoch': 1} {'type': 'loss', 'content': 0.12810266017913818, 'timestamp': '2025-09-10 02:31:32.269275', 'step': 4607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.300366', 'step': 4607, 'epoch': 1} {'type': 'loss', 'content': 0.1459251344203949, 'timestamp': '2025-09-10 02:31:32.323563', 'step': 4608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.357735', 'step': 4608, 'epoch': 1} {'type': 'loss', 'content': 0.1336851269006729, 'timestamp': '2025-09-10 02:31:32.360094', 'step': 4609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.389871', 'step': 4609, 'epoch': 1} {'type': 'loss', 'content': 0.16692808270454407, 'timestamp': '2025-09-10 02:31:32.391899', 'step': 4610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.422315', 'step': 4610, 'epoch': 1} {'type': 'loss', 'content': 0.17608489096164703, 'timestamp': '2025-09-10 02:31:32.424450', 'step': 4611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.455968', 'step': 4611, 'epoch': 1} {'type': 'loss', 'content': 0.11538542807102203, 'timestamp': '2025-09-10 02:31:32.479703', 'step': 4612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.510313', 'step': 4612, 'epoch': 1} {'type': 'loss', 'content': 0.2874358892440796, 'timestamp': '2025-09-10 02:31:32.512840', 'step': 4613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.542992', 'step': 4613, 'epoch': 1} {'type': 'loss', 'content': 0.14495065808296204, 'timestamp': '2025-09-10 02:31:32.544922', 'step': 4614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:32.574244', 'step': 4614, 'epoch': 1} {'type': 'loss', 'content': 0.10676942020654678, 'timestamp': '2025-09-10 02:31:32.575981', 'step': 4615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.606260', 'step': 4615, 'epoch': 1} {'type': 'loss', 'content': 0.14347513020038605, 'timestamp': '2025-09-10 02:31:32.629774', 'step': 4616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:32.660337', 'step': 4616, 'epoch': 1} {'type': 'loss', 'content': 0.12001387029886246, 'timestamp': '2025-09-10 02:31:32.663305', 'step': 4617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:32.692741', 'step': 4617, 'epoch': 1} {'type': 'loss', 'content': 0.12325268238782883, 'timestamp': '2025-09-10 02:31:32.694992', 'step': 4618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:32.724321', 'step': 4618, 'epoch': 1} {'type': 'loss', 'content': 0.1471690535545349, 'timestamp': '2025-09-10 02:31:32.726524', 'step': 4619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.758974', 'step': 4619, 'epoch': 1} {'type': 'loss', 'content': 0.22381335496902466, 'timestamp': '2025-09-10 02:31:32.782648', 'step': 4620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.813251', 'step': 4620, 'epoch': 1} {'type': 'loss', 'content': 0.16021820902824402, 'timestamp': '2025-09-10 02:31:32.815513', 'step': 4621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:32.845660', 'step': 4621, 'epoch': 1} {'type': 'loss', 'content': 0.1507881134748459, 'timestamp': '2025-09-10 02:31:32.847929', 'step': 4622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:32.877644', 'step': 4622, 'epoch': 1} {'type': 'loss', 'content': 0.12632855772972107, 'timestamp': '2025-09-10 02:31:32.879850', 'step': 4623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.910539', 'step': 4623, 'epoch': 1} {'type': 'loss', 'content': 0.1686156988143921, 'timestamp': '2025-09-10 02:31:32.934241', 'step': 4624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:32.964426', 'step': 4624, 'epoch': 1} {'type': 'loss', 'content': 0.37906235456466675, 'timestamp': '2025-09-10 02:31:32.967088', 'step': 4625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.998678', 'step': 4625, 'epoch': 1} {'type': 'loss', 'content': 0.22578930854797363, 'timestamp': '2025-09-10 02:31:33.001389', 'step': 4626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:33.030841', 'step': 4626, 'epoch': 1} {'type': 'loss', 'content': 0.18880794942378998, 'timestamp': '2025-09-10 02:31:33.032928', 'step': 4627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:33.063502', 'step': 4627, 'epoch': 1} {'type': 'loss', 'content': 0.09311595559120178, 'timestamp': '2025-09-10 02:31:33.087073', 'step': 4628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:33.118044', 'step': 4628, 'epoch': 1} {'type': 'loss', 'content': 0.19338664412498474, 'timestamp': '2025-09-10 02:31:33.119733', 'step': 4629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:33.148806', 'step': 4629, 'epoch': 1} {'type': 'loss', 'content': 0.16818994283676147, 'timestamp': '2025-09-10 02:31:33.150922', 'step': 4630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:33.181057', 'step': 4630, 'epoch': 1} {'type': 'loss', 'content': 0.10694801062345505, 'timestamp': '2025-09-10 02:31:33.183536', 'step': 4631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:33.213342', 'step': 4631, 'epoch': 1} {'type': 'loss', 'content': 0.1729419082403183, 'timestamp': '2025-09-10 02:31:33.236745', 'step': 4632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:33.267981', 'step': 4632, 'epoch': 1} {'type': 'loss', 'content': 0.16995511949062347, 'timestamp': '2025-09-10 02:31:33.270139', 'step': 4633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:33.301034', 'step': 4633, 'epoch': 1} {'type': 'loss', 'content': 0.1311502605676651, 'timestamp': '2025-09-10 02:31:33.303581', 'step': 4634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:33.334681', 'step': 4634, 'epoch': 1} {'type': 'loss', 'content': 0.13462895154953003, 'timestamp': '2025-09-10 02:31:33.336884', 'step': 4635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:33.367567', 'step': 4635, 'epoch': 1} {'type': 'loss', 'content': 0.13064204156398773, 'timestamp': '2025-09-10 02:31:33.391329', 'step': 4636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:33.421810', 'step': 4636, 'epoch': 1} {'type': 'loss', 'content': 0.2112080454826355, 'timestamp': '2025-09-10 02:31:33.424178', 'step': 4637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:33.456529', 'step': 4637, 'epoch': 1} {'type': 'loss', 'content': 0.1835213154554367, 'timestamp': '2025-09-10 02:31:33.458800', 'step': 4638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:33.492193', 'step': 4638, 'epoch': 1} {'type': 'loss', 'content': 0.26038357615470886, 'timestamp': '2025-09-10 02:31:33.494905', 'step': 4639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:33.527275', 'step': 4639, 'epoch': 1} {'type': 'loss', 'content': 0.22650985419750214, 'timestamp': '2025-09-10 02:31:33.550678', 'step': 4640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:33.582759', 'step': 4640, 'epoch': 1} {'type': 'loss', 'content': 0.14317992329597473, 'timestamp': '2025-09-10 02:31:33.585381', 'step': 4641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:33.617210', 'step': 4641, 'epoch': 1} {'type': 'loss', 'content': 0.1475464552640915, 'timestamp': '2025-09-10 02:31:33.619666', 'step': 4642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:33.650303', 'step': 4642, 'epoch': 1} {'type': 'loss', 'content': 0.0811862200498581, 'timestamp': '2025-09-10 02:31:33.652315', 'step': 4643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:33.682535', 'step': 4643, 'epoch': 1} {'type': 'loss', 'content': 0.2211126834154129, 'timestamp': '2025-09-10 02:31:33.706185', 'step': 4644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:33.737264', 'step': 4644, 'epoch': 1} {'type': 'loss', 'content': 0.18414954841136932, 'timestamp': '2025-09-10 02:31:33.739253', 'step': 4645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:33.768313', 'step': 4645, 'epoch': 1} {'type': 'loss', 'content': 0.13317744433879852, 'timestamp': '2025-09-10 02:31:33.770145', 'step': 4646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:33.800019', 'step': 4646, 'epoch': 1} {'type': 'loss', 'content': 0.16122829914093018, 'timestamp': '2025-09-10 02:31:33.803797', 'step': 4647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:33.836531', 'step': 4647, 'epoch': 1} {'type': 'loss', 'content': 0.12100043147802353, 'timestamp': '2025-09-10 02:31:33.859967', 'step': 4648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:33.890960', 'step': 4648, 'epoch': 1} {'type': 'loss', 'content': 0.1791187971830368, 'timestamp': '2025-09-10 02:31:33.893454', 'step': 4649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:33.923134', 'step': 4649, 'epoch': 1} {'type': 'loss', 'content': 0.16489090025424957, 'timestamp': '2025-09-10 02:31:33.926108', 'step': 4650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:33.957134', 'step': 4650, 'epoch': 1} {'type': 'loss', 'content': 0.24303331971168518, 'timestamp': '2025-09-10 02:31:33.959309', 'step': 4651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:33.989652', 'step': 4651, 'epoch': 1} {'type': 'loss', 'content': 0.1061854213476181, 'timestamp': '2025-09-10 02:31:34.013440', 'step': 4652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:34.044209', 'step': 4652, 'epoch': 1} {'type': 'loss', 'content': 0.15549634397029877, 'timestamp': '2025-09-10 02:31:34.046703', 'step': 4653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:34.079626', 'step': 4653, 'epoch': 1} {'type': 'loss', 'content': 0.2817230820655823, 'timestamp': '2025-09-10 02:31:34.083149', 'step': 4654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:34.112599', 'step': 4654, 'epoch': 1} {'type': 'loss', 'content': 0.15882772207260132, 'timestamp': '2025-09-10 02:31:34.114550', 'step': 4655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:34.143924', 'step': 4655, 'epoch': 1} {'type': 'loss', 'content': 0.13230717182159424, 'timestamp': '2025-09-10 02:31:34.167109', 'step': 4656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:34.198026', 'step': 4656, 'epoch': 1} {'type': 'loss', 'content': 0.19420942664146423, 'timestamp': '2025-09-10 02:31:34.199924', 'step': 4657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:34.229264', 'step': 4657, 'epoch': 1} {'type': 'loss', 'content': 0.18447472155094147, 'timestamp': '2025-09-10 02:31:34.231170', 'step': 4658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:34.261556', 'step': 4658, 'epoch': 1} {'type': 'loss', 'content': 0.14366227388381958, 'timestamp': '2025-09-10 02:31:34.264366', 'step': 4659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:34.294388', 'step': 4659, 'epoch': 1} {'type': 'loss', 'content': 0.21753312647342682, 'timestamp': '2025-09-10 02:31:34.317978', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:31:42.218413', 'step': 4660, 'epoch': 1} {'type': 'pplx', 'content': 8946.726486620313, 'timestamp': '2025-09-10 02:31:42.221904', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.252585', 'step': 4660, 'epoch': 1} {'type': 'loss', 'content': 0.1372804492712021, 'timestamp': '2025-09-10 02:31:42.255586', 'step': 4661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.287955', 'step': 4661, 'epoch': 1} {'type': 'loss', 'content': 0.1624927967786789, 'timestamp': '2025-09-10 02:31:42.290194', 'step': 4662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:42.320485', 'step': 4662, 'epoch': 1} {'type': 'loss', 'content': 0.14322835206985474, 'timestamp': '2025-09-10 02:31:42.323032', 'step': 4663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.353304', 'step': 4663, 'epoch': 1} {'type': 'loss', 'content': 0.1480698436498642, 'timestamp': '2025-09-10 02:31:42.377103', 'step': 4664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.407406', 'step': 4664, 'epoch': 1} {'type': 'loss', 'content': 0.09526447206735611, 'timestamp': '2025-09-10 02:31:42.410078', 'step': 4665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.440936', 'step': 4665, 'epoch': 1} {'type': 'loss', 'content': 0.21230123937129974, 'timestamp': '2025-09-10 02:31:42.443408', 'step': 4666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:42.473641', 'step': 4666, 'epoch': 1} {'type': 'loss', 'content': 0.13139544427394867, 'timestamp': '2025-09-10 02:31:42.475985', 'step': 4667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.505994', 'step': 4667, 'epoch': 1} {'type': 'loss', 'content': 0.22266897559165955, 'timestamp': '2025-09-10 02:31:42.529714', 'step': 4668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:42.560010', 'step': 4668, 'epoch': 1} {'type': 'loss', 'content': 0.14611107110977173, 'timestamp': '2025-09-10 02:31:42.562293', 'step': 4669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:42.592497', 'step': 4669, 'epoch': 1} {'type': 'loss', 'content': 0.20569342374801636, 'timestamp': '2025-09-10 02:31:42.594827', 'step': 4670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.626031', 'step': 4670, 'epoch': 1} {'type': 'loss', 'content': 0.19138652086257935, 'timestamp': '2025-09-10 02:31:42.628197', 'step': 4671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.658895', 'step': 4671, 'epoch': 1} {'type': 'loss', 'content': 0.17951801419258118, 'timestamp': '2025-09-10 02:31:42.682417', 'step': 4672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:42.714408', 'step': 4672, 'epoch': 1} {'type': 'loss', 'content': 0.14238302409648895, 'timestamp': '2025-09-10 02:31:42.716623', 'step': 4673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:42.746903', 'step': 4673, 'epoch': 1} {'type': 'loss', 'content': 0.15278777480125427, 'timestamp': '2025-09-10 02:31:42.749346', 'step': 4674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:42.779536', 'step': 4674, 'epoch': 1} {'type': 'loss', 'content': 0.17938023805618286, 'timestamp': '2025-09-10 02:31:42.781881', 'step': 4675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.813675', 'step': 4675, 'epoch': 1} {'type': 'loss', 'content': 0.17492680251598358, 'timestamp': '2025-09-10 02:31:42.837261', 'step': 4676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:42.868659', 'step': 4676, 'epoch': 1} {'type': 'loss', 'content': 0.20683026313781738, 'timestamp': '2025-09-10 02:31:42.871234', 'step': 4677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.901482', 'step': 4677, 'epoch': 1} {'type': 'loss', 'content': 0.1289990097284317, 'timestamp': '2025-09-10 02:31:42.904427', 'step': 4678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:42.934261', 'step': 4678, 'epoch': 1} {'type': 'loss', 'content': 0.20341406762599945, 'timestamp': '2025-09-10 02:31:42.936837', 'step': 4679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:42.967403', 'step': 4679, 'epoch': 1} {'type': 'loss', 'content': 0.18464362621307373, 'timestamp': '2025-09-10 02:31:42.990794', 'step': 4680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:43.022223', 'step': 4680, 'epoch': 1} {'type': 'loss', 'content': 0.11528830230236053, 'timestamp': '2025-09-10 02:31:43.024384', 'step': 4681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.054099', 'step': 4681, 'epoch': 1} {'type': 'loss', 'content': 0.18662486970424652, 'timestamp': '2025-09-10 02:31:43.056310', 'step': 4682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.089408', 'step': 4682, 'epoch': 1} {'type': 'loss', 'content': 0.089424267411232, 'timestamp': '2025-09-10 02:31:43.093069', 'step': 4683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.123468', 'step': 4683, 'epoch': 1} {'type': 'loss', 'content': 0.21717414259910583, 'timestamp': '2025-09-10 02:31:43.147155', 'step': 4684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:43.177533', 'step': 4684, 'epoch': 1} {'type': 'loss', 'content': 0.16251622140407562, 'timestamp': '2025-09-10 02:31:43.180077', 'step': 4685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.210494', 'step': 4685, 'epoch': 1} {'type': 'loss', 'content': 0.18734629452228546, 'timestamp': '2025-09-10 02:31:43.213365', 'step': 4686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:43.245819', 'step': 4686, 'epoch': 1} {'type': 'loss', 'content': 0.1646895408630371, 'timestamp': '2025-09-10 02:31:43.248070', 'step': 4687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.280077', 'step': 4687, 'epoch': 1} {'type': 'loss', 'content': 0.10449430346488953, 'timestamp': '2025-09-10 02:31:43.304261', 'step': 4688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.334973', 'step': 4688, 'epoch': 1} {'type': 'loss', 'content': 0.048661138862371445, 'timestamp': '2025-09-10 02:31:43.337615', 'step': 4689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:43.369049', 'step': 4689, 'epoch': 1} {'type': 'loss', 'content': 0.09467069059610367, 'timestamp': '2025-09-10 02:31:43.371382', 'step': 4690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.401765', 'step': 4690, 'epoch': 1} {'type': 'loss', 'content': 0.17036846280097961, 'timestamp': '2025-09-10 02:31:43.404153', 'step': 4691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.433945', 'step': 4691, 'epoch': 1} {'type': 'loss', 'content': 0.09160999208688736, 'timestamp': '2025-09-10 02:31:43.457410', 'step': 4692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.488868', 'step': 4692, 'epoch': 1} {'type': 'loss', 'content': 0.10422160476446152, 'timestamp': '2025-09-10 02:31:43.491372', 'step': 4693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.522294', 'step': 4693, 'epoch': 1} {'type': 'loss', 'content': 0.17159298062324524, 'timestamp': '2025-09-10 02:31:43.524656', 'step': 4694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.555230', 'step': 4694, 'epoch': 1} {'type': 'loss', 'content': 0.15640093386173248, 'timestamp': '2025-09-10 02:31:43.559160', 'step': 4695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:43.590564', 'step': 4695, 'epoch': 1} {'type': 'loss', 'content': 0.13758791983127594, 'timestamp': '2025-09-10 02:31:43.614516', 'step': 4696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:43.644870', 'step': 4696, 'epoch': 1} {'type': 'loss', 'content': 0.09201095253229141, 'timestamp': '2025-09-10 02:31:43.647356', 'step': 4697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:43.677956', 'step': 4697, 'epoch': 1} {'type': 'loss', 'content': 0.22990551590919495, 'timestamp': '2025-09-10 02:31:43.680382', 'step': 4698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.710621', 'step': 4698, 'epoch': 1} {'type': 'loss', 'content': 0.14714622497558594, 'timestamp': '2025-09-10 02:31:43.713255', 'step': 4699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.744784', 'step': 4699, 'epoch': 1} {'type': 'loss', 'content': 0.2449292540550232, 'timestamp': '2025-09-10 02:31:43.768386', 'step': 4700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.800644', 'step': 4700, 'epoch': 1} {'type': 'loss', 'content': 0.14626485109329224, 'timestamp': '2025-09-10 02:31:43.805673', 'step': 4701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.837054', 'step': 4701, 'epoch': 1} {'type': 'loss', 'content': 0.12489905953407288, 'timestamp': '2025-09-10 02:31:43.839584', 'step': 4702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:43.870467', 'step': 4702, 'epoch': 1} {'type': 'loss', 'content': 0.11105862259864807, 'timestamp': '2025-09-10 02:31:43.872905', 'step': 4703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:43.903199', 'step': 4703, 'epoch': 1} {'type': 'loss', 'content': 0.19483010470867157, 'timestamp': '2025-09-10 02:31:43.927646', 'step': 4704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.957681', 'step': 4704, 'epoch': 1} {'type': 'loss', 'content': 0.0935276448726654, 'timestamp': '2025-09-10 02:31:43.960268', 'step': 4705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:43.990813', 'step': 4705, 'epoch': 1} {'type': 'loss', 'content': 0.1160135492682457, 'timestamp': '2025-09-10 02:31:43.993339', 'step': 4706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.023655', 'step': 4706, 'epoch': 1} {'type': 'loss', 'content': 0.15255731344223022, 'timestamp': '2025-09-10 02:31:44.025843', 'step': 4707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.056510', 'step': 4707, 'epoch': 1} {'type': 'loss', 'content': 0.15048977732658386, 'timestamp': '2025-09-10 02:31:44.080387', 'step': 4708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:44.112020', 'step': 4708, 'epoch': 1} {'type': 'loss', 'content': 0.11973828077316284, 'timestamp': '2025-09-10 02:31:44.114226', 'step': 4709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.144425', 'step': 4709, 'epoch': 1} {'type': 'loss', 'content': 0.1436709612607956, 'timestamp': '2025-09-10 02:31:44.146809', 'step': 4710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:44.177682', 'step': 4710, 'epoch': 1} {'type': 'loss', 'content': 0.16559822857379913, 'timestamp': '2025-09-10 02:31:44.180158', 'step': 4711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.211020', 'step': 4711, 'epoch': 1} {'type': 'loss', 'content': 0.15127526223659515, 'timestamp': '2025-09-10 02:31:44.234828', 'step': 4712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.267171', 'step': 4712, 'epoch': 1} {'type': 'loss', 'content': 0.17185786366462708, 'timestamp': '2025-09-10 02:31:44.269750', 'step': 4713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:44.300192', 'step': 4713, 'epoch': 1} {'type': 'loss', 'content': 0.40608492493629456, 'timestamp': '2025-09-10 02:31:44.302817', 'step': 4714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:44.335899', 'step': 4714, 'epoch': 1} {'type': 'loss', 'content': 0.24144047498703003, 'timestamp': '2025-09-10 02:31:44.338152', 'step': 4715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.368702', 'step': 4715, 'epoch': 1} {'type': 'loss', 'content': 0.16931627690792084, 'timestamp': '2025-09-10 02:31:44.392704', 'step': 4716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.423295', 'step': 4716, 'epoch': 1} {'type': 'loss', 'content': 0.17349942028522491, 'timestamp': '2025-09-10 02:31:44.425861', 'step': 4717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.456358', 'step': 4717, 'epoch': 1} {'type': 'loss', 'content': 0.11423486471176147, 'timestamp': '2025-09-10 02:31:44.458605', 'step': 4718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:44.489881', 'step': 4718, 'epoch': 1} {'type': 'loss', 'content': 0.1545390486717224, 'timestamp': '2025-09-10 02:31:44.492589', 'step': 4719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.523740', 'step': 4719, 'epoch': 1} {'type': 'loss', 'content': 0.06708879768848419, 'timestamp': '2025-09-10 02:31:44.547504', 'step': 4720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.578030', 'step': 4720, 'epoch': 1} {'type': 'loss', 'content': 0.19888637959957123, 'timestamp': '2025-09-10 02:31:44.580355', 'step': 4721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:44.611204', 'step': 4721, 'epoch': 1} {'type': 'loss', 'content': 0.11219745129346848, 'timestamp': '2025-09-10 02:31:44.613869', 'step': 4722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.644287', 'step': 4722, 'epoch': 1} {'type': 'loss', 'content': 0.17707310616970062, 'timestamp': '2025-09-10 02:31:44.646632', 'step': 4723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.676992', 'step': 4723, 'epoch': 1} {'type': 'loss', 'content': 0.12516023218631744, 'timestamp': '2025-09-10 02:31:44.700781', 'step': 4724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.732336', 'step': 4724, 'epoch': 1} {'type': 'loss', 'content': 0.1154434010386467, 'timestamp': '2025-09-10 02:31:44.734715', 'step': 4725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.765664', 'step': 4725, 'epoch': 1} {'type': 'loss', 'content': 0.2957890033721924, 'timestamp': '2025-09-10 02:31:44.768325', 'step': 4726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.801219', 'step': 4726, 'epoch': 1} {'type': 'loss', 'content': 0.24312752485275269, 'timestamp': '2025-09-10 02:31:44.803687', 'step': 4727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:44.834940', 'step': 4727, 'epoch': 1} {'type': 'loss', 'content': 0.23464396595954895, 'timestamp': '2025-09-10 02:31:44.858387', 'step': 4728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.889564', 'step': 4728, 'epoch': 1} {'type': 'loss', 'content': 0.06716246902942657, 'timestamp': '2025-09-10 02:31:44.891714', 'step': 4729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.921535', 'step': 4729, 'epoch': 1} {'type': 'loss', 'content': 0.1097254827618599, 'timestamp': '2025-09-10 02:31:44.924262', 'step': 4730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.954623', 'step': 4730, 'epoch': 1} {'type': 'loss', 'content': 0.15512174367904663, 'timestamp': '2025-09-10 02:31:44.957795', 'step': 4731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.988437', 'step': 4731, 'epoch': 1} {'type': 'loss', 'content': 0.16589145362377167, 'timestamp': '2025-09-10 02:31:45.012527', 'step': 4732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.055705', 'step': 4732, 'epoch': 1} {'type': 'loss', 'content': 0.14482080936431885, 'timestamp': '2025-09-10 02:31:45.062031', 'step': 4733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.105723', 'step': 4733, 'epoch': 1} {'type': 'loss', 'content': 0.15555205941200256, 'timestamp': '2025-09-10 02:31:45.110276', 'step': 4734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:45.145228', 'step': 4734, 'epoch': 1} {'type': 'loss', 'content': 0.23677730560302734, 'timestamp': '2025-09-10 02:31:45.147781', 'step': 4735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.177705', 'step': 4735, 'epoch': 1} {'type': 'loss', 'content': 0.15499728918075562, 'timestamp': '2025-09-10 02:31:45.201273', 'step': 4736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.232773', 'step': 4736, 'epoch': 1} {'type': 'loss', 'content': 0.10943396389484406, 'timestamp': '2025-09-10 02:31:45.235102', 'step': 4737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:45.265668', 'step': 4737, 'epoch': 1} {'type': 'loss', 'content': 0.16287046670913696, 'timestamp': '2025-09-10 02:31:45.268494', 'step': 4738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.298577', 'step': 4738, 'epoch': 1} {'type': 'loss', 'content': 0.18403440713882446, 'timestamp': '2025-09-10 02:31:45.300973', 'step': 4739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.332019', 'step': 4739, 'epoch': 1} {'type': 'loss', 'content': 0.11953599005937576, 'timestamp': '2025-09-10 02:31:45.355608', 'step': 4740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.387128', 'step': 4740, 'epoch': 1} {'type': 'loss', 'content': 0.20112332701683044, 'timestamp': '2025-09-10 02:31:45.389345', 'step': 4741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.418795', 'step': 4741, 'epoch': 1} {'type': 'loss', 'content': 0.11417653411626816, 'timestamp': '2025-09-10 02:31:45.422436', 'step': 4742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.452514', 'step': 4742, 'epoch': 1} {'type': 'loss', 'content': 0.17056581377983093, 'timestamp': '2025-09-10 02:31:45.454740', 'step': 4743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:45.485323', 'step': 4743, 'epoch': 1} {'type': 'loss', 'content': 0.08343816548585892, 'timestamp': '2025-09-10 02:31:45.508848', 'step': 4744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:45.539676', 'step': 4744, 'epoch': 1} {'type': 'loss', 'content': 0.0838484838604927, 'timestamp': '2025-09-10 02:31:45.542213', 'step': 4745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.571801', 'step': 4745, 'epoch': 1} {'type': 'loss', 'content': 0.2506554424762726, 'timestamp': '2025-09-10 02:31:45.574334', 'step': 4746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.604279', 'step': 4746, 'epoch': 1} {'type': 'loss', 'content': 0.3063441812992096, 'timestamp': '2025-09-10 02:31:45.606875', 'step': 4747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.637157', 'step': 4747, 'epoch': 1} {'type': 'loss', 'content': 0.11051862686872482, 'timestamp': '2025-09-10 02:31:45.661381', 'step': 4748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:45.691278', 'step': 4748, 'epoch': 1} {'type': 'loss', 'content': 0.19911164045333862, 'timestamp': '2025-09-10 02:31:45.693229', 'step': 4749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.722599', 'step': 4749, 'epoch': 1} {'type': 'loss', 'content': 0.12924633920192719, 'timestamp': '2025-09-10 02:31:45.726521', 'step': 4750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:45.756420', 'step': 4750, 'epoch': 1} {'type': 'loss', 'content': 0.21944057941436768, 'timestamp': '2025-09-10 02:31:45.759242', 'step': 4751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:45.789042', 'step': 4751, 'epoch': 1} {'type': 'loss', 'content': 0.17201966047286987, 'timestamp': '2025-09-10 02:31:45.812636', 'step': 4752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.844028', 'step': 4752, 'epoch': 1} {'type': 'loss', 'content': 0.24284595251083374, 'timestamp': '2025-09-10 02:31:45.846563', 'step': 4753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.877225', 'step': 4753, 'epoch': 1} {'type': 'loss', 'content': 0.19201233983039856, 'timestamp': '2025-09-10 02:31:45.879641', 'step': 4754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.908967', 'step': 4754, 'epoch': 1} {'type': 'loss', 'content': 0.18256591260433197, 'timestamp': '2025-09-10 02:31:45.911143', 'step': 4755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.940918', 'step': 4755, 'epoch': 1} {'type': 'loss', 'content': 0.18576516211032867, 'timestamp': '2025-09-10 02:31:45.964874', 'step': 4756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.994895', 'step': 4756, 'epoch': 1} {'type': 'loss', 'content': 0.16592857241630554, 'timestamp': '2025-09-10 02:31:45.997310', 'step': 4757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:46.027471', 'step': 4757, 'epoch': 1} {'type': 'loss', 'content': 0.10695678740739822, 'timestamp': '2025-09-10 02:31:46.030672', 'step': 4758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:46.060911', 'step': 4758, 'epoch': 1} {'type': 'loss', 'content': 0.17805036902427673, 'timestamp': '2025-09-10 02:31:46.064185', 'step': 4759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.095000', 'step': 4759, 'epoch': 1} {'type': 'loss', 'content': 0.1814751774072647, 'timestamp': '2025-09-10 02:31:46.118944', 'step': 4760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.151934', 'step': 4760, 'epoch': 1} {'type': 'loss', 'content': 0.20187152922153473, 'timestamp': '2025-09-10 02:31:46.154573', 'step': 4761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.183994', 'step': 4761, 'epoch': 1} {'type': 'loss', 'content': 0.0659908875823021, 'timestamp': '2025-09-10 02:31:46.185921', 'step': 4762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.215752', 'step': 4762, 'epoch': 1} {'type': 'loss', 'content': 0.06748992949724197, 'timestamp': '2025-09-10 02:31:46.218336', 'step': 4763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:46.248913', 'step': 4763, 'epoch': 1} {'type': 'loss', 'content': 0.11930367350578308, 'timestamp': '2025-09-10 02:31:46.272834', 'step': 4764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.303356', 'step': 4764, 'epoch': 1} {'type': 'loss', 'content': 0.16561588644981384, 'timestamp': '2025-09-10 02:31:46.305465', 'step': 4765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:46.336601', 'step': 4765, 'epoch': 1} {'type': 'loss', 'content': 0.2902252972126007, 'timestamp': '2025-09-10 02:31:46.338825', 'step': 4766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.368741', 'step': 4766, 'epoch': 1} {'type': 'loss', 'content': 0.23059988021850586, 'timestamp': '2025-09-10 02:31:46.370821', 'step': 4767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:46.400956', 'step': 4767, 'epoch': 1} {'type': 'loss', 'content': 0.24026134610176086, 'timestamp': '2025-09-10 02:31:46.424535', 'step': 4768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.455497', 'step': 4768, 'epoch': 1} {'type': 'loss', 'content': 0.14173243939876556, 'timestamp': '2025-09-10 02:31:46.458667', 'step': 4769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.488973', 'step': 4769, 'epoch': 1} {'type': 'loss', 'content': 0.21049588918685913, 'timestamp': '2025-09-10 02:31:46.491272', 'step': 4770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.521895', 'step': 4770, 'epoch': 1} {'type': 'loss', 'content': 0.11259213835000992, 'timestamp': '2025-09-10 02:31:46.524290', 'step': 4771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.555702', 'step': 4771, 'epoch': 1} {'type': 'loss', 'content': 0.0938844159245491, 'timestamp': '2025-09-10 02:31:46.579714', 'step': 4772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.611387', 'step': 4772, 'epoch': 1} {'type': 'loss', 'content': 0.20054996013641357, 'timestamp': '2025-09-10 02:31:46.613780', 'step': 4773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:46.644500', 'step': 4773, 'epoch': 1} {'type': 'loss', 'content': 0.20823779702186584, 'timestamp': '2025-09-10 02:31:46.646781', 'step': 4774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:46.677605', 'step': 4774, 'epoch': 1} {'type': 'loss', 'content': 0.1885583996772766, 'timestamp': '2025-09-10 02:31:46.680198', 'step': 4775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.710493', 'step': 4775, 'epoch': 1} {'type': 'loss', 'content': 0.08727478981018066, 'timestamp': '2025-09-10 02:31:46.734915', 'step': 4776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:46.768130', 'step': 4776, 'epoch': 1} {'type': 'loss', 'content': 0.1479710042476654, 'timestamp': '2025-09-10 02:31:46.770360', 'step': 4777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.800454', 'step': 4777, 'epoch': 1} {'type': 'loss', 'content': 0.11639562249183655, 'timestamp': '2025-09-10 02:31:46.802891', 'step': 4778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.832768', 'step': 4778, 'epoch': 1} {'type': 'loss', 'content': 0.1620195358991623, 'timestamp': '2025-09-10 02:31:46.835609', 'step': 4779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.868319', 'step': 4779, 'epoch': 1} {'type': 'loss', 'content': 0.162422314286232, 'timestamp': '2025-09-10 02:31:46.892116', 'step': 4780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:46.922622', 'step': 4780, 'epoch': 1} {'type': 'loss', 'content': 0.25960054993629456, 'timestamp': '2025-09-10 02:31:46.924989', 'step': 4781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.954368', 'step': 4781, 'epoch': 1} {'type': 'loss', 'content': 0.22822719812393188, 'timestamp': '2025-09-10 02:31:46.957221', 'step': 4782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.988332', 'step': 4782, 'epoch': 1} {'type': 'loss', 'content': 0.09557618945837021, 'timestamp': '2025-09-10 02:31:46.990917', 'step': 4783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.020690', 'step': 4783, 'epoch': 1} {'type': 'loss', 'content': 0.25430727005004883, 'timestamp': '2025-09-10 02:31:47.044119', 'step': 4784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:47.074789', 'step': 4784, 'epoch': 1} {'type': 'loss', 'content': 0.13133519887924194, 'timestamp': '2025-09-10 02:31:47.077357', 'step': 4785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.108728', 'step': 4785, 'epoch': 1} {'type': 'loss', 'content': 0.1836898922920227, 'timestamp': '2025-09-10 02:31:47.112916', 'step': 4786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.142676', 'step': 4786, 'epoch': 1} {'type': 'loss', 'content': 0.11806686222553253, 'timestamp': '2025-09-10 02:31:47.145497', 'step': 4787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.176393', 'step': 4787, 'epoch': 1} {'type': 'loss', 'content': 0.1362612247467041, 'timestamp': '2025-09-10 02:31:47.200288', 'step': 4788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:47.232315', 'step': 4788, 'epoch': 1} {'type': 'loss', 'content': 0.14132627844810486, 'timestamp': '2025-09-10 02:31:47.234985', 'step': 4789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.265342', 'step': 4789, 'epoch': 1} {'type': 'loss', 'content': 0.16259227693080902, 'timestamp': '2025-09-10 02:31:47.267868', 'step': 4790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.298681', 'step': 4790, 'epoch': 1} {'type': 'loss', 'content': 0.13840070366859436, 'timestamp': '2025-09-10 02:31:47.301593', 'step': 4791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:47.333679', 'step': 4791, 'epoch': 1} {'type': 'loss', 'content': 0.26948708295822144, 'timestamp': '2025-09-10 02:31:47.357257', 'step': 4792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.388036', 'step': 4792, 'epoch': 1} {'type': 'loss', 'content': 0.28264787793159485, 'timestamp': '2025-09-10 02:31:47.390408', 'step': 4793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:47.421100', 'step': 4793, 'epoch': 1} {'type': 'loss', 'content': 0.15222108364105225, 'timestamp': '2025-09-10 02:31:47.423237', 'step': 4794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:47.452879', 'step': 4794, 'epoch': 1} {'type': 'loss', 'content': 0.181249737739563, 'timestamp': '2025-09-10 02:31:47.455293', 'step': 4795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:47.485126', 'step': 4795, 'epoch': 1} {'type': 'loss', 'content': 0.10986348241567612, 'timestamp': '2025-09-10 02:31:47.508635', 'step': 4796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:47.539920', 'step': 4796, 'epoch': 1} {'type': 'loss', 'content': 0.18393486738204956, 'timestamp': '2025-09-10 02:31:47.542746', 'step': 4797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:47.575049', 'step': 4797, 'epoch': 1} {'type': 'loss', 'content': 0.14437440037727356, 'timestamp': '2025-09-10 02:31:47.577426', 'step': 4798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.607880', 'step': 4798, 'epoch': 1} {'type': 'loss', 'content': 0.15608689188957214, 'timestamp': '2025-09-10 02:31:47.610131', 'step': 4799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.640321', 'step': 4799, 'epoch': 1} {'type': 'loss', 'content': 0.15594899654388428, 'timestamp': '2025-09-10 02:31:47.663785', 'step': 4800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:47.693945', 'step': 4800, 'epoch': 1} {'type': 'loss', 'content': 0.1412508487701416, 'timestamp': '2025-09-10 02:31:47.696101', 'step': 4801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:47.725438', 'step': 4801, 'epoch': 1} {'type': 'loss', 'content': 0.1749826818704605, 'timestamp': '2025-09-10 02:31:47.727780', 'step': 4802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:47.758864', 'step': 4802, 'epoch': 1} {'type': 'loss', 'content': 0.15959614515304565, 'timestamp': '2025-09-10 02:31:47.761609', 'step': 4803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:47.792444', 'step': 4803, 'epoch': 1} {'type': 'loss', 'content': 0.18547323346138, 'timestamp': '2025-09-10 02:31:47.816276', 'step': 4804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:47.846443', 'step': 4804, 'epoch': 1} {'type': 'loss', 'content': 0.30239972472190857, 'timestamp': '2025-09-10 02:31:47.849629', 'step': 4805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.881448', 'step': 4805, 'epoch': 1} {'type': 'loss', 'content': 0.17090825736522675, 'timestamp': '2025-09-10 02:31:47.883517', 'step': 4806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.914935', 'step': 4806, 'epoch': 1} {'type': 'loss', 'content': 0.07860297709703445, 'timestamp': '2025-09-10 02:31:47.917342', 'step': 4807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:47.947452', 'step': 4807, 'epoch': 1} {'type': 'loss', 'content': 0.16672120988368988, 'timestamp': '2025-09-10 02:31:47.971280', 'step': 4808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:48.001615', 'step': 4808, 'epoch': 1} {'type': 'loss', 'content': 0.1754397600889206, 'timestamp': '2025-09-10 02:31:48.004203', 'step': 4809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:48.035213', 'step': 4809, 'epoch': 1} {'type': 'loss', 'content': 0.19085943698883057, 'timestamp': '2025-09-10 02:31:48.038100', 'step': 4810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.068610', 'step': 4810, 'epoch': 1} {'type': 'loss', 'content': 0.12553150951862335, 'timestamp': '2025-09-10 02:31:48.071352', 'step': 4811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:48.102791', 'step': 4811, 'epoch': 1} {'type': 'loss', 'content': 0.19607336819171906, 'timestamp': '2025-09-10 02:31:48.126494', 'step': 4812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:48.157192', 'step': 4812, 'epoch': 1} {'type': 'loss', 'content': 0.2590614855289459, 'timestamp': '2025-09-10 02:31:48.159823', 'step': 4813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.191040', 'step': 4813, 'epoch': 1} {'type': 'loss', 'content': 0.24312035739421844, 'timestamp': '2025-09-10 02:31:48.193734', 'step': 4814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.224564', 'step': 4814, 'epoch': 1} {'type': 'loss', 'content': 0.08832915872335434, 'timestamp': '2025-09-10 02:31:48.228289', 'step': 4815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:48.259887', 'step': 4815, 'epoch': 1} {'type': 'loss', 'content': 0.16609443724155426, 'timestamp': '2025-09-10 02:31:48.283920', 'step': 4816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:48.330128', 'step': 4816, 'epoch': 1} {'type': 'loss', 'content': 0.16487766802310944, 'timestamp': '2025-09-10 02:31:48.332653', 'step': 4817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.364185', 'step': 4817, 'epoch': 1} {'type': 'loss', 'content': 0.11996886879205704, 'timestamp': '2025-09-10 02:31:48.366637', 'step': 4818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.397849', 'step': 4818, 'epoch': 1} {'type': 'loss', 'content': 0.10046137869358063, 'timestamp': '2025-09-10 02:31:48.400542', 'step': 4819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:48.432030', 'step': 4819, 'epoch': 1} {'type': 'loss', 'content': 0.11793233454227448, 'timestamp': '2025-09-10 02:31:48.455411', 'step': 4820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.486614', 'step': 4820, 'epoch': 1} {'type': 'loss', 'content': 0.15084151923656464, 'timestamp': '2025-09-10 02:31:48.489082', 'step': 4821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:48.519625', 'step': 4821, 'epoch': 1} {'type': 'loss', 'content': 0.18467432260513306, 'timestamp': '2025-09-10 02:31:48.522208', 'step': 4822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:48.552370', 'step': 4822, 'epoch': 1} {'type': 'loss', 'content': 0.06059049814939499, 'timestamp': '2025-09-10 02:31:48.554760', 'step': 4823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:48.586758', 'step': 4823, 'epoch': 1} {'type': 'loss', 'content': 0.10317973047494888, 'timestamp': '2025-09-10 02:31:48.610625', 'step': 4824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:48.642680', 'step': 4824, 'epoch': 1} {'type': 'loss', 'content': 0.10460302978754044, 'timestamp': '2025-09-10 02:31:48.645203', 'step': 4825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:48.676132', 'step': 4825, 'epoch': 1} {'type': 'loss', 'content': 0.09705045074224472, 'timestamp': '2025-09-10 02:31:48.678737', 'step': 4826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.709202', 'step': 4826, 'epoch': 1} {'type': 'loss', 'content': 0.06743009388446808, 'timestamp': '2025-09-10 02:31:48.711990', 'step': 4827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.745114', 'step': 4827, 'epoch': 1} {'type': 'loss', 'content': 0.10980774462223053, 'timestamp': '2025-09-10 02:31:48.770558', 'step': 4828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:48.805030', 'step': 4828, 'epoch': 1} {'type': 'loss', 'content': 0.14899039268493652, 'timestamp': '2025-09-10 02:31:48.807942', 'step': 4829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:48.839058', 'step': 4829, 'epoch': 1} {'type': 'loss', 'content': 0.14138108491897583, 'timestamp': '2025-09-10 02:31:48.841606', 'step': 4830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:48.873622', 'step': 4830, 'epoch': 1} {'type': 'loss', 'content': 0.13232623040676117, 'timestamp': '2025-09-10 02:31:48.876035', 'step': 4831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:48.906471', 'step': 4831, 'epoch': 1} {'type': 'loss', 'content': 0.20318956673145294, 'timestamp': '2025-09-10 02:31:48.930803', 'step': 4832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:48.962319', 'step': 4832, 'epoch': 1} {'type': 'loss', 'content': 0.09786249697208405, 'timestamp': '2025-09-10 02:31:48.964662', 'step': 4833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:48.996688', 'step': 4833, 'epoch': 1} {'type': 'loss', 'content': 0.15656106173992157, 'timestamp': '2025-09-10 02:31:48.998993', 'step': 4834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:49.028482', 'step': 4834, 'epoch': 1} {'type': 'loss', 'content': 0.10389604419469833, 'timestamp': '2025-09-10 02:31:49.030397', 'step': 4835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:49.060518', 'step': 4835, 'epoch': 1} {'type': 'loss', 'content': 0.13093167543411255, 'timestamp': '2025-09-10 02:31:49.084192', 'step': 4836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:49.115332', 'step': 4836, 'epoch': 1} {'type': 'loss', 'content': 0.06363919377326965, 'timestamp': '2025-09-10 02:31:49.117800', 'step': 4837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:49.150042', 'step': 4837, 'epoch': 1} {'type': 'loss', 'content': 0.18578119575977325, 'timestamp': '2025-09-10 02:31:49.157848', 'step': 4838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:49.194496', 'step': 4838, 'epoch': 1} {'type': 'loss', 'content': 0.24157492816448212, 'timestamp': '2025-09-10 02:31:49.197520', 'step': 4839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:49.227655', 'step': 4839, 'epoch': 1} {'type': 'loss', 'content': 0.1315828561782837, 'timestamp': '2025-09-10 02:31:49.251425', 'step': 4840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:49.284643', 'step': 4840, 'epoch': 1} {'type': 'loss', 'content': 0.12971508502960205, 'timestamp': '2025-09-10 02:31:49.287186', 'step': 4841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:49.317656', 'step': 4841, 'epoch': 1} {'type': 'loss', 'content': 0.1434217244386673, 'timestamp': '2025-09-10 02:31:49.320252', 'step': 4842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:49.353930', 'step': 4842, 'epoch': 1} {'type': 'loss', 'content': 0.19644075632095337, 'timestamp': '2025-09-10 02:31:49.356925', 'step': 4843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:49.389113', 'step': 4843, 'epoch': 1} {'type': 'loss', 'content': 0.12043626606464386, 'timestamp': '2025-09-10 02:31:49.414503', 'step': 4844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:49.445374', 'step': 4844, 'epoch': 1} {'type': 'loss', 'content': 0.17979344725608826, 'timestamp': '2025-09-10 02:31:49.448102', 'step': 4845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:49.478368', 'step': 4845, 'epoch': 1} {'type': 'loss', 'content': 0.07160010188817978, 'timestamp': '2025-09-10 02:31:49.480576', 'step': 4846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:49.514890', 'step': 4846, 'epoch': 1} {'type': 'loss', 'content': 0.15746834874153137, 'timestamp': '2025-09-10 02:31:49.518199', 'step': 4847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:49.549893', 'step': 4847, 'epoch': 1} {'type': 'loss', 'content': 0.167817622423172, 'timestamp': '2025-09-10 02:31:49.573970', 'step': 4848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:49.609132', 'step': 4848, 'epoch': 1} {'type': 'loss', 'content': 0.28959640860557556, 'timestamp': '2025-09-10 02:31:49.619285', 'step': 4849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:49.655248', 'step': 4849, 'epoch': 1} {'type': 'loss', 'content': 0.13516880571842194, 'timestamp': '2025-09-10 02:31:49.659659', 'step': 4850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:49.693304', 'step': 4850, 'epoch': 1} {'type': 'loss', 'content': 0.12931369245052338, 'timestamp': '2025-09-10 02:31:49.696153', 'step': 4851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:49.728183', 'step': 4851, 'epoch': 1} {'type': 'loss', 'content': 0.19043393433094025, 'timestamp': '2025-09-10 02:31:49.752278', 'step': 4852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:49.785284', 'step': 4852, 'epoch': 1} {'type': 'loss', 'content': 0.1334550678730011, 'timestamp': '2025-09-10 02:31:49.788245', 'step': 4853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:49.823497', 'step': 4853, 'epoch': 1} {'type': 'loss', 'content': 0.15078513324260712, 'timestamp': '2025-09-10 02:31:49.826093', 'step': 4854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:49.859024', 'step': 4854, 'epoch': 1} {'type': 'loss', 'content': 0.16099222004413605, 'timestamp': '2025-09-10 02:31:49.861294', 'step': 4855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:49.891648', 'step': 4855, 'epoch': 1} {'type': 'loss', 'content': 0.15564362704753876, 'timestamp': '2025-09-10 02:31:49.915494', 'step': 4856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:49.947005', 'step': 4856, 'epoch': 1} {'type': 'loss', 'content': 0.06485830992460251, 'timestamp': '2025-09-10 02:31:49.949258', 'step': 4857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:49.979020', 'step': 4857, 'epoch': 1} {'type': 'loss', 'content': 0.2987603545188904, 'timestamp': '2025-09-10 02:31:49.981642', 'step': 4858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.011950', 'step': 4858, 'epoch': 1} {'type': 'loss', 'content': 0.20386725664138794, 'timestamp': '2025-09-10 02:31:50.014176', 'step': 4859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:50.045211', 'step': 4859, 'epoch': 1} {'type': 'loss', 'content': 0.10231591016054153, 'timestamp': '2025-09-10 02:31:50.069396', 'step': 4860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.100063', 'step': 4860, 'epoch': 1} {'type': 'loss', 'content': 0.2042880654335022, 'timestamp': '2025-09-10 02:31:50.102680', 'step': 4861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:50.133322', 'step': 4861, 'epoch': 1} {'type': 'loss', 'content': 0.1448649764060974, 'timestamp': '2025-09-10 02:31:50.136044', 'step': 4862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:50.166648', 'step': 4862, 'epoch': 1} {'type': 'loss', 'content': 0.2979539930820465, 'timestamp': '2025-09-10 02:31:50.171414', 'step': 4863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:50.208996', 'step': 4863, 'epoch': 1} {'type': 'loss', 'content': 0.09540629386901855, 'timestamp': '2025-09-10 02:31:50.232803', 'step': 4864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:50.263918', 'step': 4864, 'epoch': 1} {'type': 'loss', 'content': 0.14279697835445404, 'timestamp': '2025-09-10 02:31:50.266557', 'step': 4865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:31:50.297774', 'step': 4865, 'epoch': 1} {'type': 'loss', 'content': 0.2033436894416809, 'timestamp': '2025-09-10 02:31:50.301976', 'step': 4866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.332211', 'step': 4866, 'epoch': 1} {'type': 'loss', 'content': 0.19076122343540192, 'timestamp': '2025-09-10 02:31:50.334153', 'step': 4867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:50.364492', 'step': 4867, 'epoch': 1} {'type': 'loss', 'content': 0.20256008207798004, 'timestamp': '2025-09-10 02:31:50.388238', 'step': 4868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:50.418009', 'step': 4868, 'epoch': 1} {'type': 'loss', 'content': 0.34417620301246643, 'timestamp': '2025-09-10 02:31:50.420624', 'step': 4869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:50.451428', 'step': 4869, 'epoch': 1} {'type': 'loss', 'content': 0.20664703845977783, 'timestamp': '2025-09-10 02:31:50.454184', 'step': 4870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:50.484745', 'step': 4870, 'epoch': 1} {'type': 'loss', 'content': 0.15206748247146606, 'timestamp': '2025-09-10 02:31:50.488360', 'step': 4871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.518021', 'step': 4871, 'epoch': 1} {'type': 'loss', 'content': 0.10320805013179779, 'timestamp': '2025-09-10 02:31:50.541813', 'step': 4872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.572120', 'step': 4872, 'epoch': 1} {'type': 'loss', 'content': 0.10859449207782745, 'timestamp': '2025-09-10 02:31:50.574481', 'step': 4873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.606351', 'step': 4873, 'epoch': 1} {'type': 'loss', 'content': 0.1783338338136673, 'timestamp': '2025-09-10 02:31:50.608785', 'step': 4874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.638908', 'step': 4874, 'epoch': 1} {'type': 'loss', 'content': 0.19162677228450775, 'timestamp': '2025-09-10 02:31:50.641113', 'step': 4875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:50.671185', 'step': 4875, 'epoch': 1} {'type': 'loss', 'content': 0.15027399361133575, 'timestamp': '2025-09-10 02:31:50.695124', 'step': 4876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:50.726162', 'step': 4876, 'epoch': 1} {'type': 'loss', 'content': 0.18008701503276825, 'timestamp': '2025-09-10 02:31:50.728750', 'step': 4877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:50.759393', 'step': 4877, 'epoch': 1} {'type': 'loss', 'content': 0.09342923015356064, 'timestamp': '2025-09-10 02:31:50.761528', 'step': 4878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.791320', 'step': 4878, 'epoch': 1} {'type': 'loss', 'content': 0.2601556181907654, 'timestamp': '2025-09-10 02:31:50.796307', 'step': 4879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.826353', 'step': 4879, 'epoch': 1} {'type': 'loss', 'content': 0.14158357679843903, 'timestamp': '2025-09-10 02:31:50.849580', 'step': 4880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.880022', 'step': 4880, 'epoch': 1} {'type': 'loss', 'content': 0.20716015994548798, 'timestamp': '2025-09-10 02:31:50.882188', 'step': 4881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.911467', 'step': 4881, 'epoch': 1} {'type': 'loss', 'content': 0.17420341074466705, 'timestamp': '2025-09-10 02:31:50.914374', 'step': 4882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:50.946087', 'step': 4882, 'epoch': 1} {'type': 'loss', 'content': 0.1085926815867424, 'timestamp': '2025-09-10 02:31:50.948190', 'step': 4883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:50.977120', 'step': 4883, 'epoch': 1} {'type': 'loss', 'content': 0.16531920433044434, 'timestamp': '2025-09-10 02:31:51.000860', 'step': 4884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:51.031032', 'step': 4884, 'epoch': 1} {'type': 'loss', 'content': 0.15809981524944305, 'timestamp': '2025-09-10 02:31:51.033006', 'step': 4885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.062399', 'step': 4885, 'epoch': 1} {'type': 'loss', 'content': 0.19027024507522583, 'timestamp': '2025-09-10 02:31:51.064409', 'step': 4886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.094041', 'step': 4886, 'epoch': 1} {'type': 'loss', 'content': 0.18856145441532135, 'timestamp': '2025-09-10 02:31:51.096657', 'step': 4887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.127148', 'step': 4887, 'epoch': 1} {'type': 'loss', 'content': 0.14248351752758026, 'timestamp': '2025-09-10 02:31:51.150853', 'step': 4888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.181165', 'step': 4888, 'epoch': 1} {'type': 'loss', 'content': 0.18304136395454407, 'timestamp': '2025-09-10 02:31:51.183438', 'step': 4889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.215118', 'step': 4889, 'epoch': 1} {'type': 'loss', 'content': 0.20454302430152893, 'timestamp': '2025-09-10 02:31:51.217598', 'step': 4890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:51.249500', 'step': 4890, 'epoch': 1} {'type': 'loss', 'content': 0.16482430696487427, 'timestamp': '2025-09-10 02:31:51.252133', 'step': 4891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.282681', 'step': 4891, 'epoch': 1} {'type': 'loss', 'content': 0.12587392330169678, 'timestamp': '2025-09-10 02:31:51.306354', 'step': 4892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.336677', 'step': 4892, 'epoch': 1} {'type': 'loss', 'content': 0.2202121764421463, 'timestamp': '2025-09-10 02:31:51.339320', 'step': 4893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.368532', 'step': 4893, 'epoch': 1} {'type': 'loss', 'content': 0.1872679889202118, 'timestamp': '2025-09-10 02:31:51.370419', 'step': 4894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:51.400894', 'step': 4894, 'epoch': 1} {'type': 'loss', 'content': 0.2761434018611908, 'timestamp': '2025-09-10 02:31:51.403447', 'step': 4895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.433938', 'step': 4895, 'epoch': 1} {'type': 'loss', 'content': 0.11402938514947891, 'timestamp': '2025-09-10 02:31:51.457428', 'step': 4896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:51.489851', 'step': 4896, 'epoch': 1} {'type': 'loss', 'content': 0.17389141023159027, 'timestamp': '2025-09-10 02:31:51.492805', 'step': 4897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.524096', 'step': 4897, 'epoch': 1} {'type': 'loss', 'content': 0.08778829872608185, 'timestamp': '2025-09-10 02:31:51.526485', 'step': 4898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.555861', 'step': 4898, 'epoch': 1} {'type': 'loss', 'content': 0.17728039622306824, 'timestamp': '2025-09-10 02:31:51.558102', 'step': 4899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:51.588662', 'step': 4899, 'epoch': 1} {'type': 'loss', 'content': 0.19933238625526428, 'timestamp': '2025-09-10 02:31:51.612363', 'step': 4900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.644314', 'step': 4900, 'epoch': 1} {'type': 'loss', 'content': 0.11681056022644043, 'timestamp': '2025-09-10 02:31:51.646750', 'step': 4901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:51.677129', 'step': 4901, 'epoch': 1} {'type': 'loss', 'content': 0.16026487946510315, 'timestamp': '2025-09-10 02:31:51.679749', 'step': 4902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.710209', 'step': 4902, 'epoch': 1} {'type': 'loss', 'content': 0.17918016016483307, 'timestamp': '2025-09-10 02:31:51.712017', 'step': 4903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.740963', 'step': 4903, 'epoch': 1} {'type': 'loss', 'content': 0.12823402881622314, 'timestamp': '2025-09-10 02:31:51.764641', 'step': 4904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.795813', 'step': 4904, 'epoch': 1} {'type': 'loss', 'content': 0.12709122896194458, 'timestamp': '2025-09-10 02:31:51.799804', 'step': 4905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:51.832357', 'step': 4905, 'epoch': 1} {'type': 'loss', 'content': 0.13804711401462555, 'timestamp': '2025-09-10 02:31:51.834444', 'step': 4906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.864451', 'step': 4906, 'epoch': 1} {'type': 'loss', 'content': 0.1316053867340088, 'timestamp': '2025-09-10 02:31:51.867075', 'step': 4907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.897313', 'step': 4907, 'epoch': 1} {'type': 'loss', 'content': 0.16327206790447235, 'timestamp': '2025-09-10 02:31:51.921028', 'step': 4908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:51.952198', 'step': 4908, 'epoch': 1} {'type': 'loss', 'content': 0.14056581258773804, 'timestamp': '2025-09-10 02:31:51.954543', 'step': 4909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:51.984569', 'step': 4909, 'epoch': 1} {'type': 'loss', 'content': 0.10363314300775528, 'timestamp': '2025-09-10 02:31:51.986371', 'step': 4910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.016349', 'step': 4910, 'epoch': 1} {'type': 'loss', 'content': 0.26592543721199036, 'timestamp': '2025-09-10 02:31:52.018906', 'step': 4911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.049146', 'step': 4911, 'epoch': 1} {'type': 'loss', 'content': 0.18542291224002838, 'timestamp': '2025-09-10 02:31:52.072853', 'step': 4912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.102655', 'step': 4912, 'epoch': 1} {'type': 'loss', 'content': 0.12244024872779846, 'timestamp': '2025-09-10 02:31:52.106942', 'step': 4913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.138280', 'step': 4913, 'epoch': 1} {'type': 'loss', 'content': 0.15674467384815216, 'timestamp': '2025-09-10 02:31:52.140506', 'step': 4914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:52.170294', 'step': 4914, 'epoch': 1} {'type': 'loss', 'content': 0.17689217627048492, 'timestamp': '2025-09-10 02:31:52.172519', 'step': 4915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.202735', 'step': 4915, 'epoch': 1} {'type': 'loss', 'content': 0.1351916640996933, 'timestamp': '2025-09-10 02:31:52.227063', 'step': 4916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.257900', 'step': 4916, 'epoch': 1} {'type': 'loss', 'content': 0.16115303337574005, 'timestamp': '2025-09-10 02:31:52.259923', 'step': 4917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.289257', 'step': 4917, 'epoch': 1} {'type': 'loss', 'content': 0.10916019976139069, 'timestamp': '2025-09-10 02:31:52.291321', 'step': 4918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:52.320678', 'step': 4918, 'epoch': 1} {'type': 'loss', 'content': 0.1371019035577774, 'timestamp': '2025-09-10 02:31:52.323473', 'step': 4919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.354449', 'step': 4919, 'epoch': 1} {'type': 'loss', 'content': 0.11670427024364471, 'timestamp': '2025-09-10 02:31:52.378117', 'step': 4920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.408586', 'step': 4920, 'epoch': 1} {'type': 'loss', 'content': 0.15014003217220306, 'timestamp': '2025-09-10 02:31:52.411851', 'step': 4921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.442366', 'step': 4921, 'epoch': 1} {'type': 'loss', 'content': 0.09957917034626007, 'timestamp': '2025-09-10 02:31:52.444982', 'step': 4922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.476013', 'step': 4922, 'epoch': 1} {'type': 'loss', 'content': 0.1747857928276062, 'timestamp': '2025-09-10 02:31:52.478062', 'step': 4923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:52.507392', 'step': 4923, 'epoch': 1} {'type': 'loss', 'content': 0.11008524149656296, 'timestamp': '2025-09-10 02:31:52.530758', 'step': 4924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:52.560592', 'step': 4924, 'epoch': 1} {'type': 'loss', 'content': 0.13211767375469208, 'timestamp': '2025-09-10 02:31:52.562998', 'step': 4925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.593296', 'step': 4925, 'epoch': 1} {'type': 'loss', 'content': 0.15870599448680878, 'timestamp': '2025-09-10 02:31:52.595564', 'step': 4926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:52.625741', 'step': 4926, 'epoch': 1} {'type': 'loss', 'content': 0.28594323992729187, 'timestamp': '2025-09-10 02:31:52.628185', 'step': 4927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:52.660118', 'step': 4927, 'epoch': 1} {'type': 'loss', 'content': 0.16887080669403076, 'timestamp': '2025-09-10 02:31:52.683826', 'step': 4928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.714158', 'step': 4928, 'epoch': 1} {'type': 'loss', 'content': 0.1797574907541275, 'timestamp': '2025-09-10 02:31:52.716974', 'step': 4929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:52.747923', 'step': 4929, 'epoch': 1} {'type': 'loss', 'content': 0.13249839842319489, 'timestamp': '2025-09-10 02:31:52.750447', 'step': 4930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:52.780109', 'step': 4930, 'epoch': 1} {'type': 'loss', 'content': 0.13169164955615997, 'timestamp': '2025-09-10 02:31:52.782847', 'step': 4931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:52.813713', 'step': 4931, 'epoch': 1} {'type': 'loss', 'content': 0.12251520156860352, 'timestamp': '2025-09-10 02:31:52.838824', 'step': 4932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.869208', 'step': 4932, 'epoch': 1} {'type': 'loss', 'content': 0.08468815684318542, 'timestamp': '2025-09-10 02:31:52.872031', 'step': 4933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.902036', 'step': 4933, 'epoch': 1} {'type': 'loss', 'content': 0.1586153358221054, 'timestamp': '2025-09-10 02:31:52.904688', 'step': 4934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.935307', 'step': 4934, 'epoch': 1} {'type': 'loss', 'content': 0.14747169613838196, 'timestamp': '2025-09-10 02:31:52.939693', 'step': 4935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.972683', 'step': 4935, 'epoch': 1} {'type': 'loss', 'content': 0.16912521421909332, 'timestamp': '2025-09-10 02:31:52.998157', 'step': 4936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.028718', 'step': 4936, 'epoch': 1} {'type': 'loss', 'content': 0.1617913842201233, 'timestamp': '2025-09-10 02:31:53.030814', 'step': 4937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.061150', 'step': 4937, 'epoch': 1} {'type': 'loss', 'content': 0.15859650075435638, 'timestamp': '2025-09-10 02:31:53.063677', 'step': 4938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:53.094916', 'step': 4938, 'epoch': 1} {'type': 'loss', 'content': 0.1529621183872223, 'timestamp': '2025-09-10 02:31:53.097688', 'step': 4939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:53.127807', 'step': 4939, 'epoch': 1} {'type': 'loss', 'content': 0.09481991082429886, 'timestamp': '2025-09-10 02:31:53.151550', 'step': 4940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.182067', 'step': 4940, 'epoch': 1} {'type': 'loss', 'content': 0.12958121299743652, 'timestamp': '2025-09-10 02:31:53.184494', 'step': 4941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.214555', 'step': 4941, 'epoch': 1} {'type': 'loss', 'content': 0.22747080028057098, 'timestamp': '2025-09-10 02:31:53.218145', 'step': 4942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.255665', 'step': 4942, 'epoch': 1} {'type': 'loss', 'content': 0.09997285902500153, 'timestamp': '2025-09-10 02:31:53.257611', 'step': 4943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:53.286773', 'step': 4943, 'epoch': 1} {'type': 'loss', 'content': 0.25601235032081604, 'timestamp': '2025-09-10 02:31:53.310147', 'step': 4944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.340550', 'step': 4944, 'epoch': 1} {'type': 'loss', 'content': 0.15602193772792816, 'timestamp': '2025-09-10 02:31:53.342754', 'step': 4945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:53.373190', 'step': 4945, 'epoch': 1} {'type': 'loss', 'content': 0.3342936933040619, 'timestamp': '2025-09-10 02:31:53.375852', 'step': 4946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:53.405758', 'step': 4946, 'epoch': 1} {'type': 'loss', 'content': 0.20194241404533386, 'timestamp': '2025-09-10 02:31:53.409088', 'step': 4947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:53.441096', 'step': 4947, 'epoch': 1} {'type': 'loss', 'content': 0.16361138224601746, 'timestamp': '2025-09-10 02:31:53.464719', 'step': 4948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.503301', 'step': 4948, 'epoch': 1} {'type': 'loss', 'content': 0.191622793674469, 'timestamp': '2025-09-10 02:31:53.505755', 'step': 4949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.545373', 'step': 4949, 'epoch': 1} {'type': 'loss', 'content': 0.08550577610731125, 'timestamp': '2025-09-10 02:31:53.547628', 'step': 4950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.583492', 'step': 4950, 'epoch': 1} {'type': 'loss', 'content': 0.11824151128530502, 'timestamp': '2025-09-10 02:31:53.585949', 'step': 4951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:53.621261', 'step': 4951, 'epoch': 1} {'type': 'loss', 'content': 0.15227344632148743, 'timestamp': '2025-09-10 02:31:53.644826', 'step': 4952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:53.679158', 'step': 4952, 'epoch': 1} {'type': 'loss', 'content': 0.17646189033985138, 'timestamp': '2025-09-10 02:31:53.686445', 'step': 4953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.718962', 'step': 4953, 'epoch': 1} {'type': 'loss', 'content': 0.12720178067684174, 'timestamp': '2025-09-10 02:31:53.722703', 'step': 4954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:53.756234', 'step': 4954, 'epoch': 1} {'type': 'loss', 'content': 0.15324951708316803, 'timestamp': '2025-09-10 02:31:53.758584', 'step': 4955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:53.789552', 'step': 4955, 'epoch': 1} {'type': 'loss', 'content': 0.24576790630817413, 'timestamp': '2025-09-10 02:31:53.813172', 'step': 4956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:53.844272', 'step': 4956, 'epoch': 1} {'type': 'loss', 'content': 0.11848028749227524, 'timestamp': '2025-09-10 02:31:53.846780', 'step': 4957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.878491', 'step': 4957, 'epoch': 1} {'type': 'loss', 'content': 0.1520109325647354, 'timestamp': '2025-09-10 02:31:53.881203', 'step': 4958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.911483', 'step': 4958, 'epoch': 1} {'type': 'loss', 'content': 0.20553025603294373, 'timestamp': '2025-09-10 02:31:53.914092', 'step': 4959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.944309', 'step': 4959, 'epoch': 1} {'type': 'loss', 'content': 0.14906735718250275, 'timestamp': '2025-09-10 02:31:53.968090', 'step': 4960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.999274', 'step': 4960, 'epoch': 1} {'type': 'loss', 'content': 0.13301025331020355, 'timestamp': '2025-09-10 02:31:54.001346', 'step': 4961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:54.032078', 'step': 4961, 'epoch': 1} {'type': 'loss', 'content': 0.17717719078063965, 'timestamp': '2025-09-10 02:31:54.034141', 'step': 4962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.063939', 'step': 4962, 'epoch': 1} {'type': 'loss', 'content': 0.1396215409040451, 'timestamp': '2025-09-10 02:31:54.066197', 'step': 4963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:54.097159', 'step': 4963, 'epoch': 1} {'type': 'loss', 'content': 0.13237953186035156, 'timestamp': '2025-09-10 02:31:54.121148', 'step': 4964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:54.152707', 'step': 4964, 'epoch': 1} {'type': 'loss', 'content': 0.10472413897514343, 'timestamp': '2025-09-10 02:31:54.155267', 'step': 4965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.184875', 'step': 4965, 'epoch': 1} {'type': 'loss', 'content': 0.20948933064937592, 'timestamp': '2025-09-10 02:31:54.187224', 'step': 4966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:54.217641', 'step': 4966, 'epoch': 1} {'type': 'loss', 'content': 0.16680094599723816, 'timestamp': '2025-09-10 02:31:54.220596', 'step': 4967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.255301', 'step': 4967, 'epoch': 1} {'type': 'loss', 'content': 0.06772641092538834, 'timestamp': '2025-09-10 02:31:54.279583', 'step': 4968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:54.310733', 'step': 4968, 'epoch': 1} {'type': 'loss', 'content': 0.2557661533355713, 'timestamp': '2025-09-10 02:31:54.313150', 'step': 4969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.342864', 'step': 4969, 'epoch': 1} {'type': 'loss', 'content': 0.17369556427001953, 'timestamp': '2025-09-10 02:31:54.345390', 'step': 4970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.375110', 'step': 4970, 'epoch': 1} {'type': 'loss', 'content': 0.1648862361907959, 'timestamp': '2025-09-10 02:31:54.377755', 'step': 4971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.408049', 'step': 4971, 'epoch': 1} {'type': 'loss', 'content': 0.10060514509677887, 'timestamp': '2025-09-10 02:31:54.431855', 'step': 4972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:54.464200', 'step': 4972, 'epoch': 1} {'type': 'loss', 'content': 0.13899093866348267, 'timestamp': '2025-09-10 02:31:54.466434', 'step': 4973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:54.496821', 'step': 4973, 'epoch': 1} {'type': 'loss', 'content': 0.1888820379972458, 'timestamp': '2025-09-10 02:31:54.499935', 'step': 4974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:54.532113', 'step': 4974, 'epoch': 1} {'type': 'loss', 'content': 0.09693493694067001, 'timestamp': '2025-09-10 02:31:54.535089', 'step': 4975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:54.567886', 'step': 4975, 'epoch': 1} {'type': 'loss', 'content': 0.09619610011577606, 'timestamp': '2025-09-10 02:31:54.591471', 'step': 4976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:54.622578', 'step': 4976, 'epoch': 1} {'type': 'loss', 'content': 0.1606864482164383, 'timestamp': '2025-09-10 02:31:54.625184', 'step': 4977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:54.656280', 'step': 4977, 'epoch': 1} {'type': 'loss', 'content': 0.11648702621459961, 'timestamp': '2025-09-10 02:31:54.658951', 'step': 4978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:54.689541', 'step': 4978, 'epoch': 1} {'type': 'loss', 'content': 0.2840859889984131, 'timestamp': '2025-09-10 02:31:54.693462', 'step': 4979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:54.724165', 'step': 4979, 'epoch': 1} {'type': 'loss', 'content': 0.12162137031555176, 'timestamp': '2025-09-10 02:31:54.748004', 'step': 4980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:54.777767', 'step': 4980, 'epoch': 1} {'type': 'loss', 'content': 0.13613784313201904, 'timestamp': '2025-09-10 02:31:54.780198', 'step': 4981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:31:54.815422', 'step': 4981, 'epoch': 1} {'type': 'loss', 'content': 0.10870610177516937, 'timestamp': '2025-09-10 02:31:54.817686', 'step': 4982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:54.848911', 'step': 4982, 'epoch': 1} {'type': 'loss', 'content': 0.09869500249624252, 'timestamp': '2025-09-10 02:31:54.851469', 'step': 4983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:54.881659', 'step': 4983, 'epoch': 1} {'type': 'loss', 'content': 0.18501830101013184, 'timestamp': '2025-09-10 02:31:54.905283', 'step': 4984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:54.936366', 'step': 4984, 'epoch': 1} {'type': 'loss', 'content': 0.21977393329143524, 'timestamp': '2025-09-10 02:31:54.939527', 'step': 4985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:54.971446', 'step': 4985, 'epoch': 1} {'type': 'loss', 'content': 0.1687905639410019, 'timestamp': '2025-09-10 02:31:54.974248', 'step': 4986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:55.004472', 'step': 4986, 'epoch': 1} {'type': 'loss', 'content': 0.1461285501718521, 'timestamp': '2025-09-10 02:31:55.007015', 'step': 4987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:55.037208', 'step': 4987, 'epoch': 1} {'type': 'loss', 'content': 0.10631004720926285, 'timestamp': '2025-09-10 02:31:55.061337', 'step': 4988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.095074', 'step': 4988, 'epoch': 1} {'type': 'loss', 'content': 0.08929295092821121, 'timestamp': '2025-09-10 02:31:55.097745', 'step': 4989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.129211', 'step': 4989, 'epoch': 1} {'type': 'loss', 'content': 0.12524332106113434, 'timestamp': '2025-09-10 02:31:55.132126', 'step': 4990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.162715', 'step': 4990, 'epoch': 1} {'type': 'loss', 'content': 0.22943951189517975, 'timestamp': '2025-09-10 02:31:55.165800', 'step': 4991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:55.196885', 'step': 4991, 'epoch': 1} {'type': 'loss', 'content': 0.23304808139801025, 'timestamp': '2025-09-10 02:31:55.220441', 'step': 4992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.251188', 'step': 4992, 'epoch': 1} {'type': 'loss', 'content': 0.12264695763587952, 'timestamp': '2025-09-10 02:31:55.254551', 'step': 4993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:55.291307', 'step': 4993, 'epoch': 1} {'type': 'loss', 'content': 0.24616727232933044, 'timestamp': '2025-09-10 02:31:55.294422', 'step': 4994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:55.324551', 'step': 4994, 'epoch': 1} {'type': 'loss', 'content': 0.15762218832969666, 'timestamp': '2025-09-10 02:31:55.326909', 'step': 4995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:55.358920', 'step': 4995, 'epoch': 1} {'type': 'loss', 'content': 0.14286839962005615, 'timestamp': '2025-09-10 02:31:55.382565', 'step': 4996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.413274', 'step': 4996, 'epoch': 1} {'type': 'loss', 'content': 0.1326247751712799, 'timestamp': '2025-09-10 02:31:55.415698', 'step': 4997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:55.445963', 'step': 4997, 'epoch': 1} {'type': 'loss', 'content': 0.14420491456985474, 'timestamp': '2025-09-10 02:31:55.450074', 'step': 4998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.479737', 'step': 4998, 'epoch': 1} {'type': 'loss', 'content': 0.24442611634731293, 'timestamp': '2025-09-10 02:31:55.482277', 'step': 4999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:31:55.513607', 'step': 4999, 'epoch': 1} {'type': 'loss', 'content': 0.12651266157627106, 'timestamp': '2025-09-10 02:31:55.538756', 'step': 5000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-09-10 02:32:00.130700', 'step': 5000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.185656', 'step': 5000, 'epoch': 1} {'type': 'loss', 'content': 0.13210417330265045, 'timestamp': '2025-09-10 02:32:00.188571', 'step': 5001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.222529', 'step': 5001, 'epoch': 1} {'type': 'loss', 'content': 0.12372224777936935, 'timestamp': '2025-09-10 02:32:00.225244', 'step': 5002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.257196', 'step': 5002, 'epoch': 1} {'type': 'loss', 'content': 0.15263904631137848, 'timestamp': '2025-09-10 02:32:00.259542', 'step': 5003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:00.291138', 'step': 5003, 'epoch': 1} {'type': 'loss', 'content': 0.13617350161075592, 'timestamp': '2025-09-10 02:32:00.315120', 'step': 5004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:00.362143', 'step': 5004, 'epoch': 1} {'type': 'loss', 'content': 0.1743643879890442, 'timestamp': '2025-09-10 02:32:00.371012', 'step': 5005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.415790', 'step': 5005, 'epoch': 1} {'type': 'loss', 'content': 0.2050764411687851, 'timestamp': '2025-09-10 02:32:00.420710', 'step': 5006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:00.452321', 'step': 5006, 'epoch': 1} {'type': 'loss', 'content': 0.1524287462234497, 'timestamp': '2025-09-10 02:32:00.454723', 'step': 5007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:00.485179', 'step': 5007, 'epoch': 1} {'type': 'loss', 'content': 0.15765032172203064, 'timestamp': '2025-09-10 02:32:00.509078', 'step': 5008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:00.540956', 'step': 5008, 'epoch': 1} {'type': 'loss', 'content': 0.16360202431678772, 'timestamp': '2025-09-10 02:32:00.545096', 'step': 5009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.578737', 'step': 5009, 'epoch': 1} {'type': 'loss', 'content': 0.14915676414966583, 'timestamp': '2025-09-10 02:32:00.581350', 'step': 5010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:00.611260', 'step': 5010, 'epoch': 1} {'type': 'loss', 'content': 0.21908533573150635, 'timestamp': '2025-09-10 02:32:00.613547', 'step': 5011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.645637', 'step': 5011, 'epoch': 1} {'type': 'loss', 'content': 0.1328793168067932, 'timestamp': '2025-09-10 02:32:00.669579', 'step': 5012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:00.700251', 'step': 5012, 'epoch': 1} {'type': 'loss', 'content': 0.1599912941455841, 'timestamp': '2025-09-10 02:32:00.704869', 'step': 5013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.736396', 'step': 5013, 'epoch': 1} {'type': 'loss', 'content': 0.15362854301929474, 'timestamp': '2025-09-10 02:32:00.738817', 'step': 5014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.769904', 'step': 5014, 'epoch': 1} {'type': 'loss', 'content': 0.10295740514993668, 'timestamp': '2025-09-10 02:32:00.771887', 'step': 5015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.802615', 'step': 5015, 'epoch': 1} {'type': 'loss', 'content': 0.15108008682727814, 'timestamp': '2025-09-10 02:32:00.826590', 'step': 5016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.858383', 'step': 5016, 'epoch': 1} {'type': 'loss', 'content': 0.23435476422309875, 'timestamp': '2025-09-10 02:32:00.860531', 'step': 5017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.891837', 'step': 5017, 'epoch': 1} {'type': 'loss', 'content': 0.16827034950256348, 'timestamp': '2025-09-10 02:32:00.894319', 'step': 5018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.925122', 'step': 5018, 'epoch': 1} {'type': 'loss', 'content': 0.13121956586837769, 'timestamp': '2025-09-10 02:32:00.927810', 'step': 5019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:00.958484', 'step': 5019, 'epoch': 1} {'type': 'loss', 'content': 0.21085204184055328, 'timestamp': '2025-09-10 02:32:00.982656', 'step': 5020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:01.014393', 'step': 5020, 'epoch': 1} {'type': 'loss', 'content': 0.09187191724777222, 'timestamp': '2025-09-10 02:32:01.016704', 'step': 5021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:01.048206', 'step': 5021, 'epoch': 1} {'type': 'loss', 'content': 0.13942106068134308, 'timestamp': '2025-09-10 02:32:01.050563', 'step': 5022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.081373', 'step': 5022, 'epoch': 1} {'type': 'loss', 'content': 0.1044234111905098, 'timestamp': '2025-09-10 02:32:01.083848', 'step': 5023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:01.114128', 'step': 5023, 'epoch': 1} {'type': 'loss', 'content': 0.14383283257484436, 'timestamp': '2025-09-10 02:32:01.137939', 'step': 5024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:01.170207', 'step': 5024, 'epoch': 1} {'type': 'loss', 'content': 0.12187788635492325, 'timestamp': '2025-09-10 02:32:01.172871', 'step': 5025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:01.204394', 'step': 5025, 'epoch': 1} {'type': 'loss', 'content': 0.18402108550071716, 'timestamp': '2025-09-10 02:32:01.207791', 'step': 5026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.239743', 'step': 5026, 'epoch': 1} {'type': 'loss', 'content': 0.22678014636039734, 'timestamp': '2025-09-10 02:32:01.243814', 'step': 5027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:01.277841', 'step': 5027, 'epoch': 1} {'type': 'loss', 'content': 0.1861344426870346, 'timestamp': '2025-09-10 02:32:01.301950', 'step': 5028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:01.332750', 'step': 5028, 'epoch': 1} {'type': 'loss', 'content': 0.17488740384578705, 'timestamp': '2025-09-10 02:32:01.335320', 'step': 5029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:01.365570', 'step': 5029, 'epoch': 1} {'type': 'loss', 'content': 0.15077486634254456, 'timestamp': '2025-09-10 02:32:01.368200', 'step': 5030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:01.402729', 'step': 5030, 'epoch': 1} {'type': 'loss', 'content': 0.12842530012130737, 'timestamp': '2025-09-10 02:32:01.405282', 'step': 5031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.436874', 'step': 5031, 'epoch': 1} {'type': 'loss', 'content': 0.13798224925994873, 'timestamp': '2025-09-10 02:32:01.460784', 'step': 5032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:01.493046', 'step': 5032, 'epoch': 1} {'type': 'loss', 'content': 0.1882283240556717, 'timestamp': '2025-09-10 02:32:01.495716', 'step': 5033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:01.526600', 'step': 5033, 'epoch': 1} {'type': 'loss', 'content': 0.14873450994491577, 'timestamp': '2025-09-10 02:32:01.529813', 'step': 5034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.560741', 'step': 5034, 'epoch': 1} {'type': 'loss', 'content': 0.09245983511209488, 'timestamp': '2025-09-10 02:32:01.563409', 'step': 5035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.594284', 'step': 5035, 'epoch': 1} {'type': 'loss', 'content': 0.3035829961299896, 'timestamp': '2025-09-10 02:32:01.618276', 'step': 5036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:01.649409', 'step': 5036, 'epoch': 1} {'type': 'loss', 'content': 0.12497662007808685, 'timestamp': '2025-09-10 02:32:01.651875', 'step': 5037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:01.682318', 'step': 5037, 'epoch': 1} {'type': 'loss', 'content': 0.20792172849178314, 'timestamp': '2025-09-10 02:32:01.684805', 'step': 5038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.715313', 'step': 5038, 'epoch': 1} {'type': 'loss', 'content': 0.2548483610153198, 'timestamp': '2025-09-10 02:32:01.717734', 'step': 5039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.749165', 'step': 5039, 'epoch': 1} {'type': 'loss', 'content': 0.17960260808467865, 'timestamp': '2025-09-10 02:32:01.772819', 'step': 5040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.803774', 'step': 5040, 'epoch': 1} {'type': 'loss', 'content': 0.13569815456867218, 'timestamp': '2025-09-10 02:32:01.806087', 'step': 5041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:01.836049', 'step': 5041, 'epoch': 1} {'type': 'loss', 'content': 0.1485840529203415, 'timestamp': '2025-09-10 02:32:01.838420', 'step': 5042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.870565', 'step': 5042, 'epoch': 1} {'type': 'loss', 'content': 0.21236397325992584, 'timestamp': '2025-09-10 02:32:01.873240', 'step': 5043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:01.903660', 'step': 5043, 'epoch': 1} {'type': 'loss', 'content': 0.19520390033721924, 'timestamp': '2025-09-10 02:32:01.927566', 'step': 5044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:01.959450', 'step': 5044, 'epoch': 1} {'type': 'loss', 'content': 0.20660161972045898, 'timestamp': '2025-09-10 02:32:01.968260', 'step': 5045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:02.012543', 'step': 5045, 'epoch': 1} {'type': 'loss', 'content': 0.12541596591472626, 'timestamp': '2025-09-10 02:32:02.015404', 'step': 5046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.046639', 'step': 5046, 'epoch': 1} {'type': 'loss', 'content': 0.23135070502758026, 'timestamp': '2025-09-10 02:32:02.051987', 'step': 5047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.082709', 'step': 5047, 'epoch': 1} {'type': 'loss', 'content': 0.1341364085674286, 'timestamp': '2025-09-10 02:32:02.106137', 'step': 5048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:02.137649', 'step': 5048, 'epoch': 1} {'type': 'loss', 'content': 0.13446016609668732, 'timestamp': '2025-09-10 02:32:02.140739', 'step': 5049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.172373', 'step': 5049, 'epoch': 1} {'type': 'loss', 'content': 0.15020908415317535, 'timestamp': '2025-09-10 02:32:02.174900', 'step': 5050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:02.205620', 'step': 5050, 'epoch': 1} {'type': 'loss', 'content': 0.19895930588245392, 'timestamp': '2025-09-10 02:32:02.207991', 'step': 5051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:02.239359', 'step': 5051, 'epoch': 1} {'type': 'loss', 'content': 0.2037307322025299, 'timestamp': '2025-09-10 02:32:02.267073', 'step': 5052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:02.307569', 'step': 5052, 'epoch': 1} {'type': 'loss', 'content': 0.13478101789951324, 'timestamp': '2025-09-10 02:32:02.313813', 'step': 5053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:02.348796', 'step': 5053, 'epoch': 1} {'type': 'loss', 'content': 0.1399255394935608, 'timestamp': '2025-09-10 02:32:02.352716', 'step': 5054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:02.388373', 'step': 5054, 'epoch': 1} {'type': 'loss', 'content': 0.18690694868564606, 'timestamp': '2025-09-10 02:32:02.402976', 'step': 5055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.458891', 'step': 5055, 'epoch': 1} {'type': 'loss', 'content': 0.08624740689992905, 'timestamp': '2025-09-10 02:32:02.484167', 'step': 5056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:02.518403', 'step': 5056, 'epoch': 1} {'type': 'loss', 'content': 0.3184479773044586, 'timestamp': '2025-09-10 02:32:02.521752', 'step': 5057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.558575', 'step': 5057, 'epoch': 1} {'type': 'loss', 'content': 0.10796184837818146, 'timestamp': '2025-09-10 02:32:02.566285', 'step': 5058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:02.613485', 'step': 5058, 'epoch': 1} {'type': 'loss', 'content': 0.13991887867450714, 'timestamp': '2025-09-10 02:32:02.621151', 'step': 5059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:02.659478', 'step': 5059, 'epoch': 1} {'type': 'loss', 'content': 0.07345616072416306, 'timestamp': '2025-09-10 02:32:02.687561', 'step': 5060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:02.724711', 'step': 5060, 'epoch': 1} {'type': 'loss', 'content': 0.09138066321611404, 'timestamp': '2025-09-10 02:32:02.728815', 'step': 5061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.772926', 'step': 5061, 'epoch': 1} {'type': 'loss', 'content': 0.16482199728488922, 'timestamp': '2025-09-10 02:32:02.775598', 'step': 5062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.812908', 'step': 5062, 'epoch': 1} {'type': 'loss', 'content': 0.13275426626205444, 'timestamp': '2025-09-10 02:32:02.815522', 'step': 5063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.847472', 'step': 5063, 'epoch': 1} {'type': 'loss', 'content': 0.23751050233840942, 'timestamp': '2025-09-10 02:32:02.871681', 'step': 5064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.902348', 'step': 5064, 'epoch': 1} {'type': 'loss', 'content': 0.15259751677513123, 'timestamp': '2025-09-10 02:32:02.904959', 'step': 5065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:02.936293', 'step': 5065, 'epoch': 1} {'type': 'loss', 'content': 0.22481544315814972, 'timestamp': '2025-09-10 02:32:02.939207', 'step': 5066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:02.971381', 'step': 5066, 'epoch': 1} {'type': 'loss', 'content': 0.227294459939003, 'timestamp': '2025-09-10 02:32:02.974346', 'step': 5067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:03.003994', 'step': 5067, 'epoch': 1} {'type': 'loss', 'content': 0.12324725836515427, 'timestamp': '2025-09-10 02:32:03.027887', 'step': 5068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:03.058302', 'step': 5068, 'epoch': 1} {'type': 'loss', 'content': 0.16379980742931366, 'timestamp': '2025-09-10 02:32:03.060607', 'step': 5069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:03.091783', 'step': 5069, 'epoch': 1} {'type': 'loss', 'content': 0.1958397626876831, 'timestamp': '2025-09-10 02:32:03.094619', 'step': 5070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:03.127244', 'step': 5070, 'epoch': 1} {'type': 'loss', 'content': 0.1393323689699173, 'timestamp': '2025-09-10 02:32:03.130095', 'step': 5071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:03.164025', 'step': 5071, 'epoch': 1} {'type': 'loss', 'content': 0.054638128727674484, 'timestamp': '2025-09-10 02:32:03.191354', 'step': 5072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:03.231404', 'step': 5072, 'epoch': 1} {'type': 'loss', 'content': 0.12426202744245529, 'timestamp': '2025-09-10 02:32:03.234534', 'step': 5073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:03.266915', 'step': 5073, 'epoch': 1} {'type': 'loss', 'content': 0.1699405461549759, 'timestamp': '2025-09-10 02:32:03.269727', 'step': 5074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:03.301596', 'step': 5074, 'epoch': 1} {'type': 'loss', 'content': 0.13298314809799194, 'timestamp': '2025-09-10 02:32:03.304581', 'step': 5075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:03.337619', 'step': 5075, 'epoch': 1} {'type': 'loss', 'content': 0.15161404013633728, 'timestamp': '2025-09-10 02:32:03.361648', 'step': 5076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:03.393205', 'step': 5076, 'epoch': 1} {'type': 'loss', 'content': 0.11172261089086533, 'timestamp': '2025-09-10 02:32:03.395813', 'step': 5077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:03.426861', 'step': 5077, 'epoch': 1} {'type': 'loss', 'content': 0.13270463049411774, 'timestamp': '2025-09-10 02:32:03.429109', 'step': 5078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:03.462888', 'step': 5078, 'epoch': 1} {'type': 'loss', 'content': 0.19503772258758545, 'timestamp': '2025-09-10 02:32:03.465539', 'step': 5079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:03.495880', 'step': 5079, 'epoch': 1} {'type': 'loss', 'content': 0.09268555045127869, 'timestamp': '2025-09-10 02:32:03.521310', 'step': 5080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:03.552270', 'step': 5080, 'epoch': 1} {'type': 'loss', 'content': 0.1684541553258896, 'timestamp': '2025-09-10 02:32:03.555776', 'step': 5081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:03.587659', 'step': 5081, 'epoch': 1} {'type': 'loss', 'content': 0.21374624967575073, 'timestamp': '2025-09-10 02:32:03.590489', 'step': 5082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:03.620626', 'step': 5082, 'epoch': 1} {'type': 'loss', 'content': 0.13190752267837524, 'timestamp': '2025-09-10 02:32:03.623499', 'step': 5083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:03.654581', 'step': 5083, 'epoch': 1} {'type': 'loss', 'content': 0.11351281404495239, 'timestamp': '2025-09-10 02:32:03.679760', 'step': 5084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:03.709799', 'step': 5084, 'epoch': 1} {'type': 'loss', 'content': 0.17121931910514832, 'timestamp': '2025-09-10 02:32:03.712262', 'step': 5085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:03.742314', 'step': 5085, 'epoch': 1} {'type': 'loss', 'content': 0.09985668957233429, 'timestamp': '2025-09-10 02:32:03.745019', 'step': 5086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:03.775869', 'step': 5086, 'epoch': 1} {'type': 'loss', 'content': 0.22061118483543396, 'timestamp': '2025-09-10 02:32:03.778285', 'step': 5087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:03.808945', 'step': 5087, 'epoch': 1} {'type': 'loss', 'content': 0.11851505935192108, 'timestamp': '2025-09-10 02:32:03.832659', 'step': 5088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:03.868126', 'step': 5088, 'epoch': 1} {'type': 'loss', 'content': 0.10093723237514496, 'timestamp': '2025-09-10 02:32:03.870972', 'step': 5089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:03.903076', 'step': 5089, 'epoch': 1} {'type': 'loss', 'content': 0.2190328985452652, 'timestamp': '2025-09-10 02:32:03.905526', 'step': 5090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:03.936207', 'step': 5090, 'epoch': 1} {'type': 'loss', 'content': 0.25234824419021606, 'timestamp': '2025-09-10 02:32:03.938367', 'step': 5091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:03.968858', 'step': 5091, 'epoch': 1} {'type': 'loss', 'content': 0.2486724853515625, 'timestamp': '2025-09-10 02:32:03.992435', 'step': 5092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.023178', 'step': 5092, 'epoch': 1} {'type': 'loss', 'content': 0.09227843582630157, 'timestamp': '2025-09-10 02:32:04.025436', 'step': 5093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.055749', 'step': 5093, 'epoch': 1} {'type': 'loss', 'content': 0.14645519852638245, 'timestamp': '2025-09-10 02:32:04.058477', 'step': 5094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.092294', 'step': 5094, 'epoch': 1} {'type': 'loss', 'content': 0.22154591977596283, 'timestamp': '2025-09-10 02:32:04.094725', 'step': 5095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.125195', 'step': 5095, 'epoch': 1} {'type': 'loss', 'content': 0.21557269990444183, 'timestamp': '2025-09-10 02:32:04.148828', 'step': 5096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.180574', 'step': 5096, 'epoch': 1} {'type': 'loss', 'content': 0.10675985366106033, 'timestamp': '2025-09-10 02:32:04.182974', 'step': 5097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:04.213038', 'step': 5097, 'epoch': 1} {'type': 'loss', 'content': 0.13090604543685913, 'timestamp': '2025-09-10 02:32:04.215324', 'step': 5098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.245601', 'step': 5098, 'epoch': 1} {'type': 'loss', 'content': 0.18007569015026093, 'timestamp': '2025-09-10 02:32:04.248108', 'step': 5099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:04.279721', 'step': 5099, 'epoch': 1} {'type': 'loss', 'content': 0.19007909297943115, 'timestamp': '2025-09-10 02:32:04.303521', 'step': 5100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:04.335492', 'step': 5100, 'epoch': 1} {'type': 'loss', 'content': 0.16569554805755615, 'timestamp': '2025-09-10 02:32:04.337771', 'step': 5101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.368106', 'step': 5101, 'epoch': 1} {'type': 'loss', 'content': 0.20026680827140808, 'timestamp': '2025-09-10 02:32:04.370422', 'step': 5102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:04.401845', 'step': 5102, 'epoch': 1} {'type': 'loss', 'content': 0.23379318416118622, 'timestamp': '2025-09-10 02:32:04.404305', 'step': 5103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:04.435592', 'step': 5103, 'epoch': 1} {'type': 'loss', 'content': 0.22243501245975494, 'timestamp': '2025-09-10 02:32:04.460080', 'step': 5104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.491330', 'step': 5104, 'epoch': 1} {'type': 'loss', 'content': 0.17974744737148285, 'timestamp': '2025-09-10 02:32:04.493980', 'step': 5105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:04.524394', 'step': 5105, 'epoch': 1} {'type': 'loss', 'content': 0.12375299632549286, 'timestamp': '2025-09-10 02:32:04.526893', 'step': 5106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:04.556908', 'step': 5106, 'epoch': 1} {'type': 'loss', 'content': 0.14882424473762512, 'timestamp': '2025-09-10 02:32:04.559112', 'step': 5107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.592026', 'step': 5107, 'epoch': 1} {'type': 'loss', 'content': 0.14774635434150696, 'timestamp': '2025-09-10 02:32:04.616040', 'step': 5108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:04.646997', 'step': 5108, 'epoch': 1} {'type': 'loss', 'content': 0.23760391771793365, 'timestamp': '2025-09-10 02:32:04.649816', 'step': 5109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:04.680561', 'step': 5109, 'epoch': 1} {'type': 'loss', 'content': 0.25653505325317383, 'timestamp': '2025-09-10 02:32:04.683157', 'step': 5110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:04.713345', 'step': 5110, 'epoch': 1} {'type': 'loss', 'content': 0.17091453075408936, 'timestamp': '2025-09-10 02:32:04.716197', 'step': 5111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:04.747238', 'step': 5111, 'epoch': 1} {'type': 'loss', 'content': 0.1274115890264511, 'timestamp': '2025-09-10 02:32:04.770897', 'step': 5112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:04.802050', 'step': 5112, 'epoch': 1} {'type': 'loss', 'content': 0.16970835626125336, 'timestamp': '2025-09-10 02:32:04.805084', 'step': 5113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:04.837691', 'step': 5113, 'epoch': 1} {'type': 'loss', 'content': 0.2597692012786865, 'timestamp': '2025-09-10 02:32:04.841924', 'step': 5114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:04.878627', 'step': 5114, 'epoch': 1} {'type': 'loss', 'content': 0.14154364168643951, 'timestamp': '2025-09-10 02:32:04.887527', 'step': 5115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:04.921225', 'step': 5115, 'epoch': 1} {'type': 'loss', 'content': 0.18352782726287842, 'timestamp': '2025-09-10 02:32:04.945041', 'step': 5116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:04.979032', 'step': 5116, 'epoch': 1} {'type': 'loss', 'content': 0.12345226109027863, 'timestamp': '2025-09-10 02:32:04.981868', 'step': 5117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:05.016872', 'step': 5117, 'epoch': 1} {'type': 'loss', 'content': 0.24185773730278015, 'timestamp': '2025-09-10 02:32:05.020389', 'step': 5118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:05.057197', 'step': 5118, 'epoch': 1} {'type': 'loss', 'content': 0.2081242948770523, 'timestamp': '2025-09-10 02:32:05.063951', 'step': 5119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:05.101352', 'step': 5119, 'epoch': 1} {'type': 'loss', 'content': 0.20999443531036377, 'timestamp': '2025-09-10 02:32:05.125022', 'step': 5120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:05.172049', 'step': 5120, 'epoch': 1} {'type': 'loss', 'content': 0.15162122249603271, 'timestamp': '2025-09-10 02:32:05.177595', 'step': 5121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:05.213718', 'step': 5121, 'epoch': 1} {'type': 'loss', 'content': 0.21797263622283936, 'timestamp': '2025-09-10 02:32:05.217579', 'step': 5122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:05.257462', 'step': 5122, 'epoch': 1} {'type': 'loss', 'content': 0.12098419666290283, 'timestamp': '2025-09-10 02:32:05.260492', 'step': 5123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:05.294480', 'step': 5123, 'epoch': 1} {'type': 'loss', 'content': 0.11060106754302979, 'timestamp': '2025-09-10 02:32:05.320707', 'step': 5124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:05.358340', 'step': 5124, 'epoch': 1} {'type': 'loss', 'content': 0.13921689987182617, 'timestamp': '2025-09-10 02:32:05.360676', 'step': 5125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:05.394598', 'step': 5125, 'epoch': 1} {'type': 'loss', 'content': 0.16492943465709686, 'timestamp': '2025-09-10 02:32:05.398561', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:32:13.249709', 'step': 5126, 'epoch': 1} {'type': 'pplx', 'content': 9649.34808932568, 'timestamp': '2025-09-10 02:32:13.254067', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.284274', 'step': 5126, 'epoch': 1} {'type': 'loss', 'content': 0.09727950394153595, 'timestamp': '2025-09-10 02:32:13.286513', 'step': 5127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.318634', 'step': 5127, 'epoch': 1} {'type': 'loss', 'content': 0.2445407658815384, 'timestamp': '2025-09-10 02:32:13.342267', 'step': 5128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.373402', 'step': 5128, 'epoch': 1} {'type': 'loss', 'content': 0.11146856844425201, 'timestamp': '2025-09-10 02:32:13.376002', 'step': 5129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:13.408033', 'step': 5129, 'epoch': 1} {'type': 'loss', 'content': 0.1268576830625534, 'timestamp': '2025-09-10 02:32:13.410697', 'step': 5130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.442120', 'step': 5130, 'epoch': 1} {'type': 'loss', 'content': 0.12104804813861847, 'timestamp': '2025-09-10 02:32:13.444277', 'step': 5131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.474719', 'step': 5131, 'epoch': 1} {'type': 'loss', 'content': 0.11120448261499405, 'timestamp': '2025-09-10 02:32:13.498466', 'step': 5132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.529286', 'step': 5132, 'epoch': 1} {'type': 'loss', 'content': 0.17914031445980072, 'timestamp': '2025-09-10 02:32:13.531694', 'step': 5133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:13.562057', 'step': 5133, 'epoch': 1} {'type': 'loss', 'content': 0.121137335896492, 'timestamp': '2025-09-10 02:32:13.564306', 'step': 5134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:13.594951', 'step': 5134, 'epoch': 1} {'type': 'loss', 'content': 0.16194453835487366, 'timestamp': '2025-09-10 02:32:13.597251', 'step': 5135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.627147', 'step': 5135, 'epoch': 1} {'type': 'loss', 'content': 0.14539781212806702, 'timestamp': '2025-09-10 02:32:13.650712', 'step': 5136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.681642', 'step': 5136, 'epoch': 1} {'type': 'loss', 'content': 0.12229818850755692, 'timestamp': '2025-09-10 02:32:13.684252', 'step': 5137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.717333', 'step': 5137, 'epoch': 1} {'type': 'loss', 'content': 0.24166962504386902, 'timestamp': '2025-09-10 02:32:13.719536', 'step': 5138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.749890', 'step': 5138, 'epoch': 1} {'type': 'loss', 'content': 0.20480318367481232, 'timestamp': '2025-09-10 02:32:13.752092', 'step': 5139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:13.782883', 'step': 5139, 'epoch': 1} {'type': 'loss', 'content': 0.13366976380348206, 'timestamp': '2025-09-10 02:32:13.806606', 'step': 5140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.839369', 'step': 5140, 'epoch': 1} {'type': 'loss', 'content': 0.14675191044807434, 'timestamp': '2025-09-10 02:32:13.841981', 'step': 5141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:13.872447', 'step': 5141, 'epoch': 1} {'type': 'loss', 'content': 0.13884396851062775, 'timestamp': '2025-09-10 02:32:13.876178', 'step': 5142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:13.906776', 'step': 5142, 'epoch': 1} {'type': 'loss', 'content': 0.11568156629800797, 'timestamp': '2025-09-10 02:32:13.909073', 'step': 5143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.939403', 'step': 5143, 'epoch': 1} {'type': 'loss', 'content': 0.14075185358524323, 'timestamp': '2025-09-10 02:32:13.963037', 'step': 5144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.993371', 'step': 5144, 'epoch': 1} {'type': 'loss', 'content': 0.12235578894615173, 'timestamp': '2025-09-10 02:32:13.995870', 'step': 5145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:14.025512', 'step': 5145, 'epoch': 1} {'type': 'loss', 'content': 0.11531078815460205, 'timestamp': '2025-09-10 02:32:14.027892', 'step': 5146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.059187', 'step': 5146, 'epoch': 1} {'type': 'loss', 'content': 0.11459387093782425, 'timestamp': '2025-09-10 02:32:14.061762', 'step': 5147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.093366', 'step': 5147, 'epoch': 1} {'type': 'loss', 'content': 0.19357728958129883, 'timestamp': '2025-09-10 02:32:14.117449', 'step': 5148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.148654', 'step': 5148, 'epoch': 1} {'type': 'loss', 'content': 0.04012349247932434, 'timestamp': '2025-09-10 02:32:14.150766', 'step': 5149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:14.181895', 'step': 5149, 'epoch': 1} {'type': 'loss', 'content': 0.21462564170360565, 'timestamp': '2025-09-10 02:32:14.184097', 'step': 5150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.214023', 'step': 5150, 'epoch': 1} {'type': 'loss', 'content': 0.0771341472864151, 'timestamp': '2025-09-10 02:32:14.216281', 'step': 5151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:14.246309', 'step': 5151, 'epoch': 1} {'type': 'loss', 'content': 0.13944853842258453, 'timestamp': '2025-09-10 02:32:14.270089', 'step': 5152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:14.300777', 'step': 5152, 'epoch': 1} {'type': 'loss', 'content': 0.16027745604515076, 'timestamp': '2025-09-10 02:32:14.302768', 'step': 5153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.332972', 'step': 5153, 'epoch': 1} {'type': 'loss', 'content': 0.15003876388072968, 'timestamp': '2025-09-10 02:32:14.334967', 'step': 5154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.365429', 'step': 5154, 'epoch': 1} {'type': 'loss', 'content': 0.1458250880241394, 'timestamp': '2025-09-10 02:32:14.367637', 'step': 5155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:14.398787', 'step': 5155, 'epoch': 1} {'type': 'loss', 'content': 0.10568998754024506, 'timestamp': '2025-09-10 02:32:14.422376', 'step': 5156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:14.452583', 'step': 5156, 'epoch': 1} {'type': 'loss', 'content': 0.12075581401586533, 'timestamp': '2025-09-10 02:32:14.454973', 'step': 5157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:14.485086', 'step': 5157, 'epoch': 1} {'type': 'loss', 'content': 0.13451777398586273, 'timestamp': '2025-09-10 02:32:14.487481', 'step': 5158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:14.518413', 'step': 5158, 'epoch': 1} {'type': 'loss', 'content': 0.14242202043533325, 'timestamp': '2025-09-10 02:32:14.520625', 'step': 5159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:14.550782', 'step': 5159, 'epoch': 1} {'type': 'loss', 'content': 0.1501256376504898, 'timestamp': '2025-09-10 02:32:14.574638', 'step': 5160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:14.605322', 'step': 5160, 'epoch': 1} {'type': 'loss', 'content': 0.11545155942440033, 'timestamp': '2025-09-10 02:32:14.607854', 'step': 5161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:14.638007', 'step': 5161, 'epoch': 1} {'type': 'loss', 'content': 0.18217206001281738, 'timestamp': '2025-09-10 02:32:14.640499', 'step': 5162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:14.670953', 'step': 5162, 'epoch': 1} {'type': 'loss', 'content': 0.22904615104198456, 'timestamp': '2025-09-10 02:32:14.673375', 'step': 5163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.704389', 'step': 5163, 'epoch': 1} {'type': 'loss', 'content': 0.1743173897266388, 'timestamp': '2025-09-10 02:32:14.727719', 'step': 5164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.758021', 'step': 5164, 'epoch': 1} {'type': 'loss', 'content': 0.17358098924160004, 'timestamp': '2025-09-10 02:32:14.760235', 'step': 5165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:14.791133', 'step': 5165, 'epoch': 1} {'type': 'loss', 'content': 0.11654432862997055, 'timestamp': '2025-09-10 02:32:14.793678', 'step': 5166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:14.827027', 'step': 5166, 'epoch': 1} {'type': 'loss', 'content': 0.19684287905693054, 'timestamp': '2025-09-10 02:32:14.829068', 'step': 5167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.859549', 'step': 5167, 'epoch': 1} {'type': 'loss', 'content': 0.09949494898319244, 'timestamp': '2025-09-10 02:32:14.882850', 'step': 5168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.913726', 'step': 5168, 'epoch': 1} {'type': 'loss', 'content': 0.23260006308555603, 'timestamp': '2025-09-10 02:32:14.917298', 'step': 5169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.949521', 'step': 5169, 'epoch': 1} {'type': 'loss', 'content': 0.19271469116210938, 'timestamp': '2025-09-10 02:32:14.951696', 'step': 5170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:14.982662', 'step': 5170, 'epoch': 1} {'type': 'loss', 'content': 0.206914022564888, 'timestamp': '2025-09-10 02:32:14.984734', 'step': 5171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.015549', 'step': 5171, 'epoch': 1} {'type': 'loss', 'content': 0.1729174107313156, 'timestamp': '2025-09-10 02:32:15.039128', 'step': 5172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.070760', 'step': 5172, 'epoch': 1} {'type': 'loss', 'content': 0.12033922225236893, 'timestamp': '2025-09-10 02:32:15.072727', 'step': 5173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.105023', 'step': 5173, 'epoch': 1} {'type': 'loss', 'content': 0.14470821619033813, 'timestamp': '2025-09-10 02:32:15.107194', 'step': 5174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:15.137212', 'step': 5174, 'epoch': 1} {'type': 'loss', 'content': 0.19304406642913818, 'timestamp': '2025-09-10 02:32:15.139994', 'step': 5175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.170381', 'step': 5175, 'epoch': 1} {'type': 'loss', 'content': 0.11485455930233002, 'timestamp': '2025-09-10 02:32:15.193866', 'step': 5176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:32:15.224908', 'step': 5176, 'epoch': 1} {'type': 'loss', 'content': 0.13258449733257294, 'timestamp': '2025-09-10 02:32:15.227126', 'step': 5177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:15.257308', 'step': 5177, 'epoch': 1} {'type': 'loss', 'content': 0.1366487443447113, 'timestamp': '2025-09-10 02:32:15.259858', 'step': 5178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:15.289747', 'step': 5178, 'epoch': 1} {'type': 'loss', 'content': 0.14449551701545715, 'timestamp': '2025-09-10 02:32:15.292322', 'step': 5179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:15.322441', 'step': 5179, 'epoch': 1} {'type': 'loss', 'content': 0.2202267050743103, 'timestamp': '2025-09-10 02:32:15.345822', 'step': 5180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.377251', 'step': 5180, 'epoch': 1} {'type': 'loss', 'content': 0.17583630979061127, 'timestamp': '2025-09-10 02:32:15.379576', 'step': 5181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:15.409946', 'step': 5181, 'epoch': 1} {'type': 'loss', 'content': 0.16166500747203827, 'timestamp': '2025-09-10 02:32:15.412160', 'step': 5182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.442296', 'step': 5182, 'epoch': 1} {'type': 'loss', 'content': 0.15089352428913116, 'timestamp': '2025-09-10 02:32:15.444265', 'step': 5183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.474184', 'step': 5183, 'epoch': 1} {'type': 'loss', 'content': 0.19105961918830872, 'timestamp': '2025-09-10 02:32:15.497709', 'step': 5184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.528135', 'step': 5184, 'epoch': 1} {'type': 'loss', 'content': 0.21256987750530243, 'timestamp': '2025-09-10 02:32:15.531177', 'step': 5185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.564229', 'step': 5185, 'epoch': 1} {'type': 'loss', 'content': 0.1768798977136612, 'timestamp': '2025-09-10 02:32:15.566408', 'step': 5186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.596531', 'step': 5186, 'epoch': 1} {'type': 'loss', 'content': 0.12826202809810638, 'timestamp': '2025-09-10 02:32:15.598431', 'step': 5187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.627638', 'step': 5187, 'epoch': 1} {'type': 'loss', 'content': 0.22384516894817352, 'timestamp': '2025-09-10 02:32:15.651683', 'step': 5188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.682208', 'step': 5188, 'epoch': 1} {'type': 'loss', 'content': 0.19196319580078125, 'timestamp': '2025-09-10 02:32:15.684547', 'step': 5189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.714944', 'step': 5189, 'epoch': 1} {'type': 'loss', 'content': 0.13626360893249512, 'timestamp': '2025-09-10 02:32:15.717375', 'step': 5190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.747313', 'step': 5190, 'epoch': 1} {'type': 'loss', 'content': 0.11868435144424438, 'timestamp': '2025-09-10 02:32:15.749664', 'step': 5191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:15.780313', 'step': 5191, 'epoch': 1} {'type': 'loss', 'content': 0.19178225100040436, 'timestamp': '2025-09-10 02:32:15.804004', 'step': 5192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:15.834131', 'step': 5192, 'epoch': 1} {'type': 'loss', 'content': 0.1536199003458023, 'timestamp': '2025-09-10 02:32:15.836641', 'step': 5193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.867265', 'step': 5193, 'epoch': 1} {'type': 'loss', 'content': 0.19164390861988068, 'timestamp': '2025-09-10 02:32:15.869580', 'step': 5194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:15.900598', 'step': 5194, 'epoch': 1} {'type': 'loss', 'content': 0.1084694042801857, 'timestamp': '2025-09-10 02:32:15.902985', 'step': 5195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.932988', 'step': 5195, 'epoch': 1} {'type': 'loss', 'content': 0.1337590366601944, 'timestamp': '2025-09-10 02:32:15.956443', 'step': 5196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:15.986863', 'step': 5196, 'epoch': 1} {'type': 'loss', 'content': 0.2847418785095215, 'timestamp': '2025-09-10 02:32:15.989351', 'step': 5197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.019544', 'step': 5197, 'epoch': 1} {'type': 'loss', 'content': 0.23172318935394287, 'timestamp': '2025-09-10 02:32:16.021684', 'step': 5198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:16.051922', 'step': 5198, 'epoch': 1} {'type': 'loss', 'content': 0.13535510003566742, 'timestamp': '2025-09-10 02:32:16.054415', 'step': 5199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.085576', 'step': 5199, 'epoch': 1} {'type': 'loss', 'content': 0.1973300576210022, 'timestamp': '2025-09-10 02:32:16.109000', 'step': 5200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.141723', 'step': 5200, 'epoch': 1} {'type': 'loss', 'content': 0.2528015673160553, 'timestamp': '2025-09-10 02:32:16.144423', 'step': 5201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.175401', 'step': 5201, 'epoch': 1} {'type': 'loss', 'content': 0.19298264384269714, 'timestamp': '2025-09-10 02:32:16.177907', 'step': 5202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:16.214607', 'step': 5202, 'epoch': 1} {'type': 'loss', 'content': 0.14241454005241394, 'timestamp': '2025-09-10 02:32:16.218870', 'step': 5203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.250421', 'step': 5203, 'epoch': 1} {'type': 'loss', 'content': 0.11735929548740387, 'timestamp': '2025-09-10 02:32:16.273886', 'step': 5204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.304578', 'step': 5204, 'epoch': 1} {'type': 'loss', 'content': 0.1159374788403511, 'timestamp': '2025-09-10 02:32:16.306801', 'step': 5205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:16.337180', 'step': 5205, 'epoch': 1} {'type': 'loss', 'content': 0.17683376371860504, 'timestamp': '2025-09-10 02:32:16.341276', 'step': 5206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:16.371792', 'step': 5206, 'epoch': 1} {'type': 'loss', 'content': 0.14484216272830963, 'timestamp': '2025-09-10 02:32:16.374115', 'step': 5207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:16.404801', 'step': 5207, 'epoch': 1} {'type': 'loss', 'content': 0.17902857065200806, 'timestamp': '2025-09-10 02:32:16.428210', 'step': 5208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.458556', 'step': 5208, 'epoch': 1} {'type': 'loss', 'content': 0.1834082007408142, 'timestamp': '2025-09-10 02:32:16.462121', 'step': 5209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.496276', 'step': 5209, 'epoch': 1} {'type': 'loss', 'content': 0.18707695603370667, 'timestamp': '2025-09-10 02:32:16.498727', 'step': 5210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:16.535599', 'step': 5210, 'epoch': 1} {'type': 'loss', 'content': 0.12808582186698914, 'timestamp': '2025-09-10 02:32:16.538724', 'step': 5211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.570693', 'step': 5211, 'epoch': 1} {'type': 'loss', 'content': 0.15770982205867767, 'timestamp': '2025-09-10 02:32:16.598232', 'step': 5212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.638527', 'step': 5212, 'epoch': 1} {'type': 'loss', 'content': 0.14668530225753784, 'timestamp': '2025-09-10 02:32:16.643098', 'step': 5213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:16.674130', 'step': 5213, 'epoch': 1} {'type': 'loss', 'content': 0.21204398572444916, 'timestamp': '2025-09-10 02:32:16.676960', 'step': 5214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:16.708889', 'step': 5214, 'epoch': 1} {'type': 'loss', 'content': 0.1260332465171814, 'timestamp': '2025-09-10 02:32:16.711417', 'step': 5215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.748579', 'step': 5215, 'epoch': 1} {'type': 'loss', 'content': 0.1306794136762619, 'timestamp': '2025-09-10 02:32:16.772166', 'step': 5216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.808516', 'step': 5216, 'epoch': 1} {'type': 'loss', 'content': 0.16333238780498505, 'timestamp': '2025-09-10 02:32:16.810714', 'step': 5217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:16.841191', 'step': 5217, 'epoch': 1} {'type': 'loss', 'content': 0.16847410798072815, 'timestamp': '2025-09-10 02:32:16.843622', 'step': 5218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.881659', 'step': 5218, 'epoch': 1} {'type': 'loss', 'content': 0.1158808022737503, 'timestamp': '2025-09-10 02:32:16.886028', 'step': 5219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:16.916607', 'step': 5219, 'epoch': 1} {'type': 'loss', 'content': 0.18661625683307648, 'timestamp': '2025-09-10 02:32:16.940932', 'step': 5220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:16.971604', 'step': 5220, 'epoch': 1} {'type': 'loss', 'content': 0.23129616677761078, 'timestamp': '2025-09-10 02:32:16.973603', 'step': 5221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:17.003716', 'step': 5221, 'epoch': 1} {'type': 'loss', 'content': 0.2754497230052948, 'timestamp': '2025-09-10 02:32:17.007227', 'step': 5222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.038730', 'step': 5222, 'epoch': 1} {'type': 'loss', 'content': 0.09351546317338943, 'timestamp': '2025-09-10 02:32:17.040911', 'step': 5223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.071131', 'step': 5223, 'epoch': 1} {'type': 'loss', 'content': 0.14865614473819733, 'timestamp': '2025-09-10 02:32:17.095804', 'step': 5224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:17.128643', 'step': 5224, 'epoch': 1} {'type': 'loss', 'content': 0.1218128651380539, 'timestamp': '2025-09-10 02:32:17.132714', 'step': 5225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.166121', 'step': 5225, 'epoch': 1} {'type': 'loss', 'content': 0.11206243932247162, 'timestamp': '2025-09-10 02:32:17.168862', 'step': 5226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:17.199179', 'step': 5226, 'epoch': 1} {'type': 'loss', 'content': 0.16167505085468292, 'timestamp': '2025-09-10 02:32:17.201760', 'step': 5227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:17.232781', 'step': 5227, 'epoch': 1} {'type': 'loss', 'content': 0.08745352178812027, 'timestamp': '2025-09-10 02:32:17.256150', 'step': 5228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.287207', 'step': 5228, 'epoch': 1} {'type': 'loss', 'content': 0.1950361430644989, 'timestamp': '2025-09-10 02:32:17.289569', 'step': 5229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.320045', 'step': 5229, 'epoch': 1} {'type': 'loss', 'content': 0.1880377233028412, 'timestamp': '2025-09-10 02:32:17.322694', 'step': 5230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:17.353059', 'step': 5230, 'epoch': 1} {'type': 'loss', 'content': 0.07403909415006638, 'timestamp': '2025-09-10 02:32:17.355904', 'step': 5231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:17.386385', 'step': 5231, 'epoch': 1} {'type': 'loss', 'content': 0.1936468631029129, 'timestamp': '2025-09-10 02:32:17.409874', 'step': 5232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.439960', 'step': 5232, 'epoch': 1} {'type': 'loss', 'content': 0.14277240633964539, 'timestamp': '2025-09-10 02:32:17.442449', 'step': 5233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:17.473517', 'step': 5233, 'epoch': 1} {'type': 'loss', 'content': 0.2676067650318146, 'timestamp': '2025-09-10 02:32:17.476126', 'step': 5234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.507653', 'step': 5234, 'epoch': 1} {'type': 'loss', 'content': 0.06499197334051132, 'timestamp': '2025-09-10 02:32:17.510394', 'step': 5235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:17.541542', 'step': 5235, 'epoch': 1} {'type': 'loss', 'content': 0.14789584279060364, 'timestamp': '2025-09-10 02:32:17.565135', 'step': 5236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:17.595750', 'step': 5236, 'epoch': 1} {'type': 'loss', 'content': 0.11872093379497528, 'timestamp': '2025-09-10 02:32:17.597950', 'step': 5237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:17.628537', 'step': 5237, 'epoch': 1} {'type': 'loss', 'content': 0.2053399384021759, 'timestamp': '2025-09-10 02:32:17.630693', 'step': 5238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.661924', 'step': 5238, 'epoch': 1} {'type': 'loss', 'content': 0.12305314093828201, 'timestamp': '2025-09-10 02:32:17.664516', 'step': 5239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:17.694938', 'step': 5239, 'epoch': 1} {'type': 'loss', 'content': 0.20629656314849854, 'timestamp': '2025-09-10 02:32:17.718396', 'step': 5240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:17.748936', 'step': 5240, 'epoch': 1} {'type': 'loss', 'content': 0.1438732147216797, 'timestamp': '2025-09-10 02:32:17.751273', 'step': 5241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.781694', 'step': 5241, 'epoch': 1} {'type': 'loss', 'content': 0.15771791338920593, 'timestamp': '2025-09-10 02:32:17.784182', 'step': 5242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.813630', 'step': 5242, 'epoch': 1} {'type': 'loss', 'content': 0.11769536882638931, 'timestamp': '2025-09-10 02:32:17.815781', 'step': 5243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:17.845742', 'step': 5243, 'epoch': 1} {'type': 'loss', 'content': 0.14666491746902466, 'timestamp': '2025-09-10 02:32:17.869155', 'step': 5244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:17.900134', 'step': 5244, 'epoch': 1} {'type': 'loss', 'content': 0.07384470105171204, 'timestamp': '2025-09-10 02:32:17.902553', 'step': 5245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:17.933054', 'step': 5245, 'epoch': 1} {'type': 'loss', 'content': 0.13451772928237915, 'timestamp': '2025-09-10 02:32:17.935576', 'step': 5246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:17.966859', 'step': 5246, 'epoch': 1} {'type': 'loss', 'content': 0.19386129081249237, 'timestamp': '2025-09-10 02:32:17.969089', 'step': 5247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:17.999236', 'step': 5247, 'epoch': 1} {'type': 'loss', 'content': 0.12897494435310364, 'timestamp': '2025-09-10 02:32:18.022491', 'step': 5248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.053195', 'step': 5248, 'epoch': 1} {'type': 'loss', 'content': 0.12132445722818375, 'timestamp': '2025-09-10 02:32:18.055802', 'step': 5249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:18.086516', 'step': 5249, 'epoch': 1} {'type': 'loss', 'content': 0.19014690816402435, 'timestamp': '2025-09-10 02:32:18.088888', 'step': 5250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.120017', 'step': 5250, 'epoch': 1} {'type': 'loss', 'content': 0.09929690510034561, 'timestamp': '2025-09-10 02:32:18.122532', 'step': 5251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.152537', 'step': 5251, 'epoch': 1} {'type': 'loss', 'content': 0.14547233283519745, 'timestamp': '2025-09-10 02:32:18.176716', 'step': 5252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.208199', 'step': 5252, 'epoch': 1} {'type': 'loss', 'content': 0.191587433218956, 'timestamp': '2025-09-10 02:32:18.210550', 'step': 5253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.240449', 'step': 5253, 'epoch': 1} {'type': 'loss', 'content': 0.19486530125141144, 'timestamp': '2025-09-10 02:32:18.243790', 'step': 5254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:18.274638', 'step': 5254, 'epoch': 1} {'type': 'loss', 'content': 0.18656429648399353, 'timestamp': '2025-09-10 02:32:18.277029', 'step': 5255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.307027', 'step': 5255, 'epoch': 1} {'type': 'loss', 'content': 0.1589706987142563, 'timestamp': '2025-09-10 02:32:18.330973', 'step': 5256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:18.361725', 'step': 5256, 'epoch': 1} {'type': 'loss', 'content': 0.30583643913269043, 'timestamp': '2025-09-10 02:32:18.364401', 'step': 5257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:18.394893', 'step': 5257, 'epoch': 1} {'type': 'loss', 'content': 0.14298449456691742, 'timestamp': '2025-09-10 02:32:18.397257', 'step': 5258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.427313', 'step': 5258, 'epoch': 1} {'type': 'loss', 'content': 0.058349888771772385, 'timestamp': '2025-09-10 02:32:18.429633', 'step': 5259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.459969', 'step': 5259, 'epoch': 1} {'type': 'loss', 'content': 0.11537210643291473, 'timestamp': '2025-09-10 02:32:18.483203', 'step': 5260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:18.515298', 'step': 5260, 'epoch': 1} {'type': 'loss', 'content': 0.09790585935115814, 'timestamp': '2025-09-10 02:32:18.517558', 'step': 5261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.548921', 'step': 5261, 'epoch': 1} {'type': 'loss', 'content': 0.09255301207304001, 'timestamp': '2025-09-10 02:32:18.551124', 'step': 5262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:18.582201', 'step': 5262, 'epoch': 1} {'type': 'loss', 'content': 0.07618280500173569, 'timestamp': '2025-09-10 02:32:18.584251', 'step': 5263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:18.615252', 'step': 5263, 'epoch': 1} {'type': 'loss', 'content': 0.1385880559682846, 'timestamp': '2025-09-10 02:32:18.638744', 'step': 5264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.670339', 'step': 5264, 'epoch': 1} {'type': 'loss', 'content': 0.2751060724258423, 'timestamp': '2025-09-10 02:32:18.672460', 'step': 5265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.702497', 'step': 5265, 'epoch': 1} {'type': 'loss', 'content': 0.19507697224617004, 'timestamp': '2025-09-10 02:32:18.704700', 'step': 5266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:18.735936', 'step': 5266, 'epoch': 1} {'type': 'loss', 'content': 0.15440475940704346, 'timestamp': '2025-09-10 02:32:18.738355', 'step': 5267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.768762', 'step': 5267, 'epoch': 1} {'type': 'loss', 'content': 0.11082274466753006, 'timestamp': '2025-09-10 02:32:18.792278', 'step': 5268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:18.824693', 'step': 5268, 'epoch': 1} {'type': 'loss', 'content': 0.17406044900417328, 'timestamp': '2025-09-10 02:32:18.827200', 'step': 5269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.856735', 'step': 5269, 'epoch': 1} {'type': 'loss', 'content': 0.12986929714679718, 'timestamp': '2025-09-10 02:32:18.859023', 'step': 5270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.889241', 'step': 5270, 'epoch': 1} {'type': 'loss', 'content': 0.1965973824262619, 'timestamp': '2025-09-10 02:32:18.893262', 'step': 5271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.923442', 'step': 5271, 'epoch': 1} {'type': 'loss', 'content': 0.1014554426074028, 'timestamp': '2025-09-10 02:32:18.947261', 'step': 5272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:18.979663', 'step': 5272, 'epoch': 1} {'type': 'loss', 'content': 0.17747844755649567, 'timestamp': '2025-09-10 02:32:18.981973', 'step': 5273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:19.013300', 'step': 5273, 'epoch': 1} {'type': 'loss', 'content': 0.25976383686065674, 'timestamp': '2025-09-10 02:32:19.015752', 'step': 5274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:19.046459', 'step': 5274, 'epoch': 1} {'type': 'loss', 'content': 0.23700857162475586, 'timestamp': '2025-09-10 02:32:19.048815', 'step': 5275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:19.079890', 'step': 5275, 'epoch': 1} {'type': 'loss', 'content': 0.1355230212211609, 'timestamp': '2025-09-10 02:32:19.103582', 'step': 5276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:19.134201', 'step': 5276, 'epoch': 1} {'type': 'loss', 'content': 0.10497967898845673, 'timestamp': '2025-09-10 02:32:19.136088', 'step': 5277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:19.165893', 'step': 5277, 'epoch': 1} {'type': 'loss', 'content': 0.14915095269680023, 'timestamp': '2025-09-10 02:32:19.168186', 'step': 5278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.199301', 'step': 5278, 'epoch': 1} {'type': 'loss', 'content': 0.29776379466056824, 'timestamp': '2025-09-10 02:32:19.202003', 'step': 5279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:19.233426', 'step': 5279, 'epoch': 1} {'type': 'loss', 'content': 0.11806356906890869, 'timestamp': '2025-09-10 02:32:19.257220', 'step': 5280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:19.286885', 'step': 5280, 'epoch': 1} {'type': 'loss', 'content': 0.18011939525604248, 'timestamp': '2025-09-10 02:32:19.288781', 'step': 5281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:19.318375', 'step': 5281, 'epoch': 1} {'type': 'loss', 'content': 0.17364543676376343, 'timestamp': '2025-09-10 02:32:19.320765', 'step': 5282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:19.352541', 'step': 5282, 'epoch': 1} {'type': 'loss', 'content': 0.13278503715991974, 'timestamp': '2025-09-10 02:32:19.354885', 'step': 5283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.386789', 'step': 5283, 'epoch': 1} {'type': 'loss', 'content': 0.1229356899857521, 'timestamp': '2025-09-10 02:32:19.412058', 'step': 5284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:19.443028', 'step': 5284, 'epoch': 1} {'type': 'loss', 'content': 0.18655447661876678, 'timestamp': '2025-09-10 02:32:19.445776', 'step': 5285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:19.476477', 'step': 5285, 'epoch': 1} {'type': 'loss', 'content': 0.2648703455924988, 'timestamp': '2025-09-10 02:32:19.478867', 'step': 5286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:19.509377', 'step': 5286, 'epoch': 1} {'type': 'loss', 'content': 0.12968948483467102, 'timestamp': '2025-09-10 02:32:19.511768', 'step': 5287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:19.542235', 'step': 5287, 'epoch': 1} {'type': 'loss', 'content': 0.10905081033706665, 'timestamp': '2025-09-10 02:32:19.565662', 'step': 5288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.600117', 'step': 5288, 'epoch': 1} {'type': 'loss', 'content': 0.2503541111946106, 'timestamp': '2025-09-10 02:32:19.602580', 'step': 5289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:19.633186', 'step': 5289, 'epoch': 1} {'type': 'loss', 'content': 0.1235765665769577, 'timestamp': '2025-09-10 02:32:19.635263', 'step': 5290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:19.664712', 'step': 5290, 'epoch': 1} {'type': 'loss', 'content': 0.23357337713241577, 'timestamp': '2025-09-10 02:32:19.667485', 'step': 5291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:19.698245', 'step': 5291, 'epoch': 1} {'type': 'loss', 'content': 0.17713198065757751, 'timestamp': '2025-09-10 02:32:19.721950', 'step': 5292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:19.754739', 'step': 5292, 'epoch': 1} {'type': 'loss', 'content': 0.22123277187347412, 'timestamp': '2025-09-10 02:32:19.757148', 'step': 5293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:19.788400', 'step': 5293, 'epoch': 1} {'type': 'loss', 'content': 0.15316516160964966, 'timestamp': '2025-09-10 02:32:19.790650', 'step': 5294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.821566', 'step': 5294, 'epoch': 1} {'type': 'loss', 'content': 0.10002712160348892, 'timestamp': '2025-09-10 02:32:19.824050', 'step': 5295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:19.854902', 'step': 5295, 'epoch': 1} {'type': 'loss', 'content': 0.12109841406345367, 'timestamp': '2025-09-10 02:32:19.878470', 'step': 5296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:19.913290', 'step': 5296, 'epoch': 1} {'type': 'loss', 'content': 0.18667744100093842, 'timestamp': '2025-09-10 02:32:19.915957', 'step': 5297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:19.947579', 'step': 5297, 'epoch': 1} {'type': 'loss', 'content': 0.17034943401813507, 'timestamp': '2025-09-10 02:32:19.950710', 'step': 5298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.983171', 'step': 5298, 'epoch': 1} {'type': 'loss', 'content': 0.1615680754184723, 'timestamp': '2025-09-10 02:32:19.986340', 'step': 5299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.019632', 'step': 5299, 'epoch': 1} {'type': 'loss', 'content': 0.143331840634346, 'timestamp': '2025-09-10 02:32:20.043342', 'step': 5300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.075663', 'step': 5300, 'epoch': 1} {'type': 'loss', 'content': 0.13249611854553223, 'timestamp': '2025-09-10 02:32:20.078218', 'step': 5301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:20.109010', 'step': 5301, 'epoch': 1} {'type': 'loss', 'content': 0.1820460557937622, 'timestamp': '2025-09-10 02:32:20.111337', 'step': 5302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.141506', 'step': 5302, 'epoch': 1} {'type': 'loss', 'content': 0.235279381275177, 'timestamp': '2025-09-10 02:32:20.143664', 'step': 5303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.174111', 'step': 5303, 'epoch': 1} {'type': 'loss', 'content': 0.13407860696315765, 'timestamp': '2025-09-10 02:32:20.197700', 'step': 5304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.228177', 'step': 5304, 'epoch': 1} {'type': 'loss', 'content': 0.09811755269765854, 'timestamp': '2025-09-10 02:32:20.230535', 'step': 5305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:20.264000', 'step': 5305, 'epoch': 1} {'type': 'loss', 'content': 0.17630928754806519, 'timestamp': '2025-09-10 02:32:20.266281', 'step': 5306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.298895', 'step': 5306, 'epoch': 1} {'type': 'loss', 'content': 0.13513599336147308, 'timestamp': '2025-09-10 02:32:20.300913', 'step': 5307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:20.331433', 'step': 5307, 'epoch': 1} {'type': 'loss', 'content': 0.15464605391025543, 'timestamp': '2025-09-10 02:32:20.355009', 'step': 5308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.385777', 'step': 5308, 'epoch': 1} {'type': 'loss', 'content': 0.15772971510887146, 'timestamp': '2025-09-10 02:32:20.388382', 'step': 5309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:20.420969', 'step': 5309, 'epoch': 1} {'type': 'loss', 'content': 0.1532164067029953, 'timestamp': '2025-09-10 02:32:20.423183', 'step': 5310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.455063', 'step': 5310, 'epoch': 1} {'type': 'loss', 'content': 0.10332857072353363, 'timestamp': '2025-09-10 02:32:20.457411', 'step': 5311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:20.487418', 'step': 5311, 'epoch': 1} {'type': 'loss', 'content': 0.12499936670064926, 'timestamp': '2025-09-10 02:32:20.511640', 'step': 5312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:20.542419', 'step': 5312, 'epoch': 1} {'type': 'loss', 'content': 0.20450632274150848, 'timestamp': '2025-09-10 02:32:20.545370', 'step': 5313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.576204', 'step': 5313, 'epoch': 1} {'type': 'loss', 'content': 0.27031654119491577, 'timestamp': '2025-09-10 02:32:20.579596', 'step': 5314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.613972', 'step': 5314, 'epoch': 1} {'type': 'loss', 'content': 0.11990207433700562, 'timestamp': '2025-09-10 02:32:20.616407', 'step': 5315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.647230', 'step': 5315, 'epoch': 1} {'type': 'loss', 'content': 0.18614809215068817, 'timestamp': '2025-09-10 02:32:20.671065', 'step': 5316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:20.702629', 'step': 5316, 'epoch': 1} {'type': 'loss', 'content': 0.099340058863163, 'timestamp': '2025-09-10 02:32:20.704802', 'step': 5317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.735654', 'step': 5317, 'epoch': 1} {'type': 'loss', 'content': 0.12234886735677719, 'timestamp': '2025-09-10 02:32:20.738007', 'step': 5318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.768174', 'step': 5318, 'epoch': 1} {'type': 'loss', 'content': 0.14639517664909363, 'timestamp': '2025-09-10 02:32:20.770361', 'step': 5319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.801758', 'step': 5319, 'epoch': 1} {'type': 'loss', 'content': 0.1523393988609314, 'timestamp': '2025-09-10 02:32:20.825194', 'step': 5320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.856263', 'step': 5320, 'epoch': 1} {'type': 'loss', 'content': 0.12556378543376923, 'timestamp': '2025-09-10 02:32:20.858755', 'step': 5321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:20.890134', 'step': 5321, 'epoch': 1} {'type': 'loss', 'content': 0.1011049747467041, 'timestamp': '2025-09-10 02:32:20.892802', 'step': 5322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.925458', 'step': 5322, 'epoch': 1} {'type': 'loss', 'content': 0.1694502979516983, 'timestamp': '2025-09-10 02:32:20.927476', 'step': 5323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.958222', 'step': 5323, 'epoch': 1} {'type': 'loss', 'content': 0.255473792552948, 'timestamp': '2025-09-10 02:32:20.981493', 'step': 5324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.012532', 'step': 5324, 'epoch': 1} {'type': 'loss', 'content': 0.17108413577079773, 'timestamp': '2025-09-10 02:32:21.014954', 'step': 5325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.045317', 'step': 5325, 'epoch': 1} {'type': 'loss', 'content': 0.1180422231554985, 'timestamp': '2025-09-10 02:32:21.047669', 'step': 5326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.078972', 'step': 5326, 'epoch': 1} {'type': 'loss', 'content': 0.1244615763425827, 'timestamp': '2025-09-10 02:32:21.082109', 'step': 5327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.111933', 'step': 5327, 'epoch': 1} {'type': 'loss', 'content': 0.12192387878894806, 'timestamp': '2025-09-10 02:32:21.135630', 'step': 5328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:21.165546', 'step': 5328, 'epoch': 1} {'type': 'loss', 'content': 0.20072028040885925, 'timestamp': '2025-09-10 02:32:21.167738', 'step': 5329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:32:21.198740', 'step': 5329, 'epoch': 1} {'type': 'loss', 'content': 0.1535114347934723, 'timestamp': '2025-09-10 02:32:21.203378', 'step': 5330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.233335', 'step': 5330, 'epoch': 1} {'type': 'loss', 'content': 0.16620419919490814, 'timestamp': '2025-09-10 02:32:21.235542', 'step': 5331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:21.265826', 'step': 5331, 'epoch': 1} {'type': 'loss', 'content': 0.18591274321079254, 'timestamp': '2025-09-10 02:32:21.288983', 'step': 5332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.320699', 'step': 5332, 'epoch': 1} {'type': 'loss', 'content': 0.18125784397125244, 'timestamp': '2025-09-10 02:32:21.323038', 'step': 5333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.353627', 'step': 5333, 'epoch': 1} {'type': 'loss', 'content': 0.07666237652301788, 'timestamp': '2025-09-10 02:32:21.355910', 'step': 5334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.386960', 'step': 5334, 'epoch': 1} {'type': 'loss', 'content': 0.22411417961120605, 'timestamp': '2025-09-10 02:32:21.389254', 'step': 5335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:21.420035', 'step': 5335, 'epoch': 1} {'type': 'loss', 'content': 0.17213791608810425, 'timestamp': '2025-09-10 02:32:21.443644', 'step': 5336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.474584', 'step': 5336, 'epoch': 1} {'type': 'loss', 'content': 0.15897515416145325, 'timestamp': '2025-09-10 02:32:21.477086', 'step': 5337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.507039', 'step': 5337, 'epoch': 1} {'type': 'loss', 'content': 0.23086000978946686, 'timestamp': '2025-09-10 02:32:21.509432', 'step': 5338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:21.540316', 'step': 5338, 'epoch': 1} {'type': 'loss', 'content': 0.156746968626976, 'timestamp': '2025-09-10 02:32:21.542782', 'step': 5339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.572867', 'step': 5339, 'epoch': 1} {'type': 'loss', 'content': 0.15134760737419128, 'timestamp': '2025-09-10 02:32:21.596401', 'step': 5340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.627277', 'step': 5340, 'epoch': 1} {'type': 'loss', 'content': 0.07040563225746155, 'timestamp': '2025-09-10 02:32:21.629495', 'step': 5341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.660126', 'step': 5341, 'epoch': 1} {'type': 'loss', 'content': 0.05855857580900192, 'timestamp': '2025-09-10 02:32:21.662639', 'step': 5342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.693166', 'step': 5342, 'epoch': 1} {'type': 'loss', 'content': 0.16589674353599548, 'timestamp': '2025-09-10 02:32:21.695364', 'step': 5343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.725411', 'step': 5343, 'epoch': 1} {'type': 'loss', 'content': 0.17097865045070648, 'timestamp': '2025-09-10 02:32:21.748861', 'step': 5344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:21.780132', 'step': 5344, 'epoch': 1} {'type': 'loss', 'content': 0.19584141671657562, 'timestamp': '2025-09-10 02:32:21.782406', 'step': 5345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:21.813651', 'step': 5345, 'epoch': 1} {'type': 'loss', 'content': 0.2256344109773636, 'timestamp': '2025-09-10 02:32:21.815578', 'step': 5346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:21.845608', 'step': 5346, 'epoch': 1} {'type': 'loss', 'content': 0.18643413484096527, 'timestamp': '2025-09-10 02:32:21.847748', 'step': 5347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.877210', 'step': 5347, 'epoch': 1} {'type': 'loss', 'content': 0.2223806232213974, 'timestamp': '2025-09-10 02:32:21.900674', 'step': 5348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.931557', 'step': 5348, 'epoch': 1} {'type': 'loss', 'content': 0.15923817455768585, 'timestamp': '2025-09-10 02:32:21.933901', 'step': 5349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:21.964044', 'step': 5349, 'epoch': 1} {'type': 'loss', 'content': 0.20323115587234497, 'timestamp': '2025-09-10 02:32:21.966629', 'step': 5350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.998129', 'step': 5350, 'epoch': 1} {'type': 'loss', 'content': 0.20080912113189697, 'timestamp': '2025-09-10 02:32:22.000725', 'step': 5351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.031063', 'step': 5351, 'epoch': 1} {'type': 'loss', 'content': 0.21070365607738495, 'timestamp': '2025-09-10 02:32:22.054451', 'step': 5352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.085488', 'step': 5352, 'epoch': 1} {'type': 'loss', 'content': 0.09876199811697006, 'timestamp': '2025-09-10 02:32:22.088274', 'step': 5353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:22.122943', 'step': 5353, 'epoch': 1} {'type': 'loss', 'content': 0.10953465849161148, 'timestamp': '2025-09-10 02:32:22.125490', 'step': 5354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:22.157686', 'step': 5354, 'epoch': 1} {'type': 'loss', 'content': 0.2215511053800583, 'timestamp': '2025-09-10 02:32:22.160805', 'step': 5355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.191792', 'step': 5355, 'epoch': 1} {'type': 'loss', 'content': 0.17095565795898438, 'timestamp': '2025-09-10 02:32:22.218231', 'step': 5356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.248590', 'step': 5356, 'epoch': 1} {'type': 'loss', 'content': 0.14313533902168274, 'timestamp': '2025-09-10 02:32:22.250748', 'step': 5357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:22.282533', 'step': 5357, 'epoch': 1} {'type': 'loss', 'content': 0.19935470819473267, 'timestamp': '2025-09-10 02:32:22.284738', 'step': 5358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.316207', 'step': 5358, 'epoch': 1} {'type': 'loss', 'content': 0.11147332191467285, 'timestamp': '2025-09-10 02:32:22.318306', 'step': 5359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:22.348481', 'step': 5359, 'epoch': 1} {'type': 'loss', 'content': 0.15511885285377502, 'timestamp': '2025-09-10 02:32:22.371958', 'step': 5360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.404633', 'step': 5360, 'epoch': 1} {'type': 'loss', 'content': 0.13692623376846313, 'timestamp': '2025-09-10 02:32:22.406821', 'step': 5361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.437207', 'step': 5361, 'epoch': 1} {'type': 'loss', 'content': 0.13065201044082642, 'timestamp': '2025-09-10 02:32:22.439189', 'step': 5362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.469245', 'step': 5362, 'epoch': 1} {'type': 'loss', 'content': 0.15097641944885254, 'timestamp': '2025-09-10 02:32:22.471343', 'step': 5363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.501342', 'step': 5363, 'epoch': 1} {'type': 'loss', 'content': 0.11626046150922775, 'timestamp': '2025-09-10 02:32:22.524837', 'step': 5364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:22.555567', 'step': 5364, 'epoch': 1} {'type': 'loss', 'content': 0.10450733453035355, 'timestamp': '2025-09-10 02:32:22.557998', 'step': 5365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.604338', 'step': 5365, 'epoch': 1} {'type': 'loss', 'content': 0.15674467384815216, 'timestamp': '2025-09-10 02:32:22.607177', 'step': 5366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.637110', 'step': 5366, 'epoch': 1} {'type': 'loss', 'content': 0.09094266593456268, 'timestamp': '2025-09-10 02:32:22.639372', 'step': 5367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.670484', 'step': 5367, 'epoch': 1} {'type': 'loss', 'content': 0.1043737381696701, 'timestamp': '2025-09-10 02:32:22.694764', 'step': 5368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.727453', 'step': 5368, 'epoch': 1} {'type': 'loss', 'content': 0.2011273056268692, 'timestamp': '2025-09-10 02:32:22.729670', 'step': 5369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.761407', 'step': 5369, 'epoch': 1} {'type': 'loss', 'content': 0.11266371607780457, 'timestamp': '2025-09-10 02:32:22.764403', 'step': 5370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:22.807638', 'step': 5370, 'epoch': 1} {'type': 'loss', 'content': 0.14670567214488983, 'timestamp': '2025-09-10 02:32:22.814224', 'step': 5371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.850782', 'step': 5371, 'epoch': 1} {'type': 'loss', 'content': 0.1627412885427475, 'timestamp': '2025-09-10 02:32:22.874291', 'step': 5372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:22.906205', 'step': 5372, 'epoch': 1} {'type': 'loss', 'content': 0.1358482986688614, 'timestamp': '2025-09-10 02:32:22.908307', 'step': 5373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.939129', 'step': 5373, 'epoch': 1} {'type': 'loss', 'content': 0.1274407058954239, 'timestamp': '2025-09-10 02:32:22.941570', 'step': 5374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:22.971882', 'step': 5374, 'epoch': 1} {'type': 'loss', 'content': 0.08533117175102234, 'timestamp': '2025-09-10 02:32:22.974276', 'step': 5375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.004104', 'step': 5375, 'epoch': 1} {'type': 'loss', 'content': 0.1114574447274208, 'timestamp': '2025-09-10 02:32:23.027912', 'step': 5376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.058463', 'step': 5376, 'epoch': 1} {'type': 'loss', 'content': 0.12231026589870453, 'timestamp': '2025-09-10 02:32:23.060650', 'step': 5377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.091098', 'step': 5377, 'epoch': 1} {'type': 'loss', 'content': 0.12178096175193787, 'timestamp': '2025-09-10 02:32:23.092941', 'step': 5378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.123656', 'step': 5378, 'epoch': 1} {'type': 'loss', 'content': 0.19964398443698883, 'timestamp': '2025-09-10 02:32:23.126426', 'step': 5379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.159042', 'step': 5379, 'epoch': 1} {'type': 'loss', 'content': 0.1784030795097351, 'timestamp': '2025-09-10 02:32:23.182790', 'step': 5380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:23.212918', 'step': 5380, 'epoch': 1} {'type': 'loss', 'content': 0.13202226161956787, 'timestamp': '2025-09-10 02:32:23.215537', 'step': 5381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.246751', 'step': 5381, 'epoch': 1} {'type': 'loss', 'content': 0.13115274906158447, 'timestamp': '2025-09-10 02:32:23.248927', 'step': 5382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.279725', 'step': 5382, 'epoch': 1} {'type': 'loss', 'content': 0.09250333160161972, 'timestamp': '2025-09-10 02:32:23.282021', 'step': 5383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.312202', 'step': 5383, 'epoch': 1} {'type': 'loss', 'content': 0.13818350434303284, 'timestamp': '2025-09-10 02:32:23.336013', 'step': 5384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:23.365657', 'step': 5384, 'epoch': 1} {'type': 'loss', 'content': 0.1224868968129158, 'timestamp': '2025-09-10 02:32:23.367882', 'step': 5385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.398546', 'step': 5385, 'epoch': 1} {'type': 'loss', 'content': 0.09175363928079605, 'timestamp': '2025-09-10 02:32:23.400664', 'step': 5386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.432224', 'step': 5386, 'epoch': 1} {'type': 'loss', 'content': 0.16309906542301178, 'timestamp': '2025-09-10 02:32:23.434983', 'step': 5387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:23.465963', 'step': 5387, 'epoch': 1} {'type': 'loss', 'content': 0.14974172413349152, 'timestamp': '2025-09-10 02:32:23.489281', 'step': 5388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:23.520586', 'step': 5388, 'epoch': 1} {'type': 'loss', 'content': 0.11598458141088486, 'timestamp': '2025-09-10 02:32:23.522869', 'step': 5389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.553720', 'step': 5389, 'epoch': 1} {'type': 'loss', 'content': 0.19047610461711884, 'timestamp': '2025-09-10 02:32:23.555958', 'step': 5390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:23.586275', 'step': 5390, 'epoch': 1} {'type': 'loss', 'content': 0.17484980821609497, 'timestamp': '2025-09-10 02:32:23.588570', 'step': 5391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.618899', 'step': 5391, 'epoch': 1} {'type': 'loss', 'content': 0.22358918190002441, 'timestamp': '2025-09-10 02:32:23.642590', 'step': 5392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:23.672737', 'step': 5392, 'epoch': 1} {'type': 'loss', 'content': 0.07892069220542908, 'timestamp': '2025-09-10 02:32:23.674926', 'step': 5393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.705506', 'step': 5393, 'epoch': 1} {'type': 'loss', 'content': 0.10792262107133865, 'timestamp': '2025-09-10 02:32:23.707880', 'step': 5394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.738911', 'step': 5394, 'epoch': 1} {'type': 'loss', 'content': 0.20447872579097748, 'timestamp': '2025-09-10 02:32:23.741215', 'step': 5395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:23.771405', 'step': 5395, 'epoch': 1} {'type': 'loss', 'content': 0.19282495975494385, 'timestamp': '2025-09-10 02:32:23.794923', 'step': 5396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.826010', 'step': 5396, 'epoch': 1} {'type': 'loss', 'content': 0.13288907706737518, 'timestamp': '2025-09-10 02:32:23.828242', 'step': 5397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.859351', 'step': 5397, 'epoch': 1} {'type': 'loss', 'content': 0.19369277358055115, 'timestamp': '2025-09-10 02:32:23.862927', 'step': 5398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.894466', 'step': 5398, 'epoch': 1} {'type': 'loss', 'content': 0.20106594264507294, 'timestamp': '2025-09-10 02:32:23.897074', 'step': 5399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:23.927493', 'step': 5399, 'epoch': 1} {'type': 'loss', 'content': 0.17292217910289764, 'timestamp': '2025-09-10 02:32:23.950881', 'step': 5400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.981214', 'step': 5400, 'epoch': 1} {'type': 'loss', 'content': 0.1200261265039444, 'timestamp': '2025-09-10 02:32:23.983576', 'step': 5401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.013182', 'step': 5401, 'epoch': 1} {'type': 'loss', 'content': 0.14390012621879578, 'timestamp': '2025-09-10 02:32:24.015633', 'step': 5402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:24.049745', 'step': 5402, 'epoch': 1} {'type': 'loss', 'content': 0.16989454627037048, 'timestamp': '2025-09-10 02:32:24.052155', 'step': 5403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.081903', 'step': 5403, 'epoch': 1} {'type': 'loss', 'content': 0.10600557923316956, 'timestamp': '2025-09-10 02:32:24.105411', 'step': 5404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.136465', 'step': 5404, 'epoch': 1} {'type': 'loss', 'content': 0.11497233808040619, 'timestamp': '2025-09-10 02:32:24.138824', 'step': 5405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.169474', 'step': 5405, 'epoch': 1} {'type': 'loss', 'content': 0.21411606669425964, 'timestamp': '2025-09-10 02:32:24.171533', 'step': 5406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.201540', 'step': 5406, 'epoch': 1} {'type': 'loss', 'content': 0.15586969256401062, 'timestamp': '2025-09-10 02:32:24.203757', 'step': 5407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:24.233723', 'step': 5407, 'epoch': 1} {'type': 'loss', 'content': 0.1324460804462433, 'timestamp': '2025-09-10 02:32:24.257183', 'step': 5408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.287695', 'step': 5408, 'epoch': 1} {'type': 'loss', 'content': 0.06733729690313339, 'timestamp': '2025-09-10 02:32:24.289905', 'step': 5409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:24.319956', 'step': 5409, 'epoch': 1} {'type': 'loss', 'content': 0.1256936937570572, 'timestamp': '2025-09-10 02:32:24.323503', 'step': 5410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.354386', 'step': 5410, 'epoch': 1} {'type': 'loss', 'content': 0.14937476813793182, 'timestamp': '2025-09-10 02:32:24.356248', 'step': 5411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:24.385343', 'step': 5411, 'epoch': 1} {'type': 'loss', 'content': 0.21290138363838196, 'timestamp': '2025-09-10 02:32:24.409782', 'step': 5412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.440141', 'step': 5412, 'epoch': 1} {'type': 'loss', 'content': 0.12192793935537338, 'timestamp': '2025-09-10 02:32:24.443446', 'step': 5413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.477517', 'step': 5413, 'epoch': 1} {'type': 'loss', 'content': 0.09906361997127533, 'timestamp': '2025-09-10 02:32:24.479632', 'step': 5414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.508963', 'step': 5414, 'epoch': 1} {'type': 'loss', 'content': 0.14588260650634766, 'timestamp': '2025-09-10 02:32:24.511248', 'step': 5415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.541295', 'step': 5415, 'epoch': 1} {'type': 'loss', 'content': 0.24378982186317444, 'timestamp': '2025-09-10 02:32:24.564339', 'step': 5416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:24.595063', 'step': 5416, 'epoch': 1} {'type': 'loss', 'content': 0.15177036821842194, 'timestamp': '2025-09-10 02:32:24.597267', 'step': 5417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:24.628134', 'step': 5417, 'epoch': 1} {'type': 'loss', 'content': 0.19548757374286652, 'timestamp': '2025-09-10 02:32:24.631363', 'step': 5418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.665414', 'step': 5418, 'epoch': 1} {'type': 'loss', 'content': 0.102386973798275, 'timestamp': '2025-09-10 02:32:24.667519', 'step': 5419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.700408', 'step': 5419, 'epoch': 1} {'type': 'loss', 'content': 0.13151976466178894, 'timestamp': '2025-09-10 02:32:24.723928', 'step': 5420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.755597', 'step': 5420, 'epoch': 1} {'type': 'loss', 'content': 0.130001500248909, 'timestamp': '2025-09-10 02:32:24.757863', 'step': 5421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.788194', 'step': 5421, 'epoch': 1} {'type': 'loss', 'content': 0.22008833289146423, 'timestamp': '2025-09-10 02:32:24.790321', 'step': 5422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.822950', 'step': 5422, 'epoch': 1} {'type': 'loss', 'content': 0.11312240362167358, 'timestamp': '2025-09-10 02:32:24.825381', 'step': 5423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:24.856871', 'step': 5423, 'epoch': 1} {'type': 'loss', 'content': 0.18120981752872467, 'timestamp': '2025-09-10 02:32:24.880491', 'step': 5424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:24.911984', 'step': 5424, 'epoch': 1} {'type': 'loss', 'content': 0.1542927771806717, 'timestamp': '2025-09-10 02:32:24.914304', 'step': 5425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:24.944155', 'step': 5425, 'epoch': 1} {'type': 'loss', 'content': 0.10493889451026917, 'timestamp': '2025-09-10 02:32:24.946569', 'step': 5426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.977465', 'step': 5426, 'epoch': 1} {'type': 'loss', 'content': 0.09131050109863281, 'timestamp': '2025-09-10 02:32:24.979811', 'step': 5427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:25.010042', 'step': 5427, 'epoch': 1} {'type': 'loss', 'content': 0.22102239727973938, 'timestamp': '2025-09-10 02:32:25.033533', 'step': 5428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:25.063764', 'step': 5428, 'epoch': 1} {'type': 'loss', 'content': 0.0856405720114708, 'timestamp': '2025-09-10 02:32:25.065964', 'step': 5429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:25.095897', 'step': 5429, 'epoch': 1} {'type': 'loss', 'content': 0.09155663102865219, 'timestamp': '2025-09-10 02:32:25.098087', 'step': 5430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:25.130640', 'step': 5430, 'epoch': 1} {'type': 'loss', 'content': 0.13340023159980774, 'timestamp': '2025-09-10 02:32:25.133779', 'step': 5431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:25.164449', 'step': 5431, 'epoch': 1} {'type': 'loss', 'content': 0.09944690763950348, 'timestamp': '2025-09-10 02:32:25.188054', 'step': 5432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:25.217853', 'step': 5432, 'epoch': 1} {'type': 'loss', 'content': 0.13746881484985352, 'timestamp': '2025-09-10 02:32:25.220268', 'step': 5433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:25.250610', 'step': 5433, 'epoch': 1} {'type': 'loss', 'content': 0.21834154427051544, 'timestamp': '2025-09-10 02:32:25.252893', 'step': 5434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:25.283488', 'step': 5434, 'epoch': 1} {'type': 'loss', 'content': 0.28285714983940125, 'timestamp': '2025-09-10 02:32:25.285667', 'step': 5435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:25.315881', 'step': 5435, 'epoch': 1} {'type': 'loss', 'content': 0.12829142808914185, 'timestamp': '2025-09-10 02:32:25.339684', 'step': 5436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:25.370019', 'step': 5436, 'epoch': 1} {'type': 'loss', 'content': 0.1541973501443863, 'timestamp': '2025-09-10 02:32:25.372380', 'step': 5437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:25.402396', 'step': 5437, 'epoch': 1} {'type': 'loss', 'content': 0.16863374412059784, 'timestamp': '2025-09-10 02:32:25.405259', 'step': 5438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:25.435844', 'step': 5438, 'epoch': 1} {'type': 'loss', 'content': 0.1407601684331894, 'timestamp': '2025-09-10 02:32:25.438683', 'step': 5439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:25.469445', 'step': 5439, 'epoch': 1} {'type': 'loss', 'content': 0.15623678267002106, 'timestamp': '2025-09-10 02:32:25.493111', 'step': 5440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:25.522925', 'step': 5440, 'epoch': 1} {'type': 'loss', 'content': 0.20517924427986145, 'timestamp': '2025-09-10 02:32:25.525119', 'step': 5441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:25.554744', 'step': 5441, 'epoch': 1} {'type': 'loss', 'content': 0.14626681804656982, 'timestamp': '2025-09-10 02:32:25.556907', 'step': 5442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:25.586485', 'step': 5442, 'epoch': 1} {'type': 'loss', 'content': 0.15863749384880066, 'timestamp': '2025-09-10 02:32:25.588675', 'step': 5443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:25.618614', 'step': 5443, 'epoch': 1} {'type': 'loss', 'content': 0.11397416144609451, 'timestamp': '2025-09-10 02:32:25.642212', 'step': 5444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:25.677916', 'step': 5444, 'epoch': 1} {'type': 'loss', 'content': 0.11582006514072418, 'timestamp': '2025-09-10 02:32:25.679921', 'step': 5445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:25.709935', 'step': 5445, 'epoch': 1} {'type': 'loss', 'content': 0.14886856079101562, 'timestamp': '2025-09-10 02:32:25.711770', 'step': 5446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:25.741505', 'step': 5446, 'epoch': 1} {'type': 'loss', 'content': 0.14588119089603424, 'timestamp': '2025-09-10 02:32:25.743665', 'step': 5447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:25.773874', 'step': 5447, 'epoch': 1} {'type': 'loss', 'content': 0.12496784329414368, 'timestamp': '2025-09-10 02:32:25.797265', 'step': 5448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:25.828854', 'step': 5448, 'epoch': 1} {'type': 'loss', 'content': 0.20380078256130219, 'timestamp': '2025-09-10 02:32:25.831146', 'step': 5449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:25.861703', 'step': 5449, 'epoch': 1} {'type': 'loss', 'content': 0.12618222832679749, 'timestamp': '2025-09-10 02:32:25.867117', 'step': 5450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:25.909079', 'step': 5450, 'epoch': 1} {'type': 'loss', 'content': 0.25308093428611755, 'timestamp': '2025-09-10 02:32:25.912395', 'step': 5451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:25.944589', 'step': 5451, 'epoch': 1} {'type': 'loss', 'content': 0.12593840062618256, 'timestamp': '2025-09-10 02:32:25.968104', 'step': 5452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:25.998521', 'step': 5452, 'epoch': 1} {'type': 'loss', 'content': 0.1618281751871109, 'timestamp': '2025-09-10 02:32:26.001175', 'step': 5453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:26.032966', 'step': 5453, 'epoch': 1} {'type': 'loss', 'content': 0.23726893961429596, 'timestamp': '2025-09-10 02:32:26.034985', 'step': 5454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:26.067478', 'step': 5454, 'epoch': 1} {'type': 'loss', 'content': 0.13478520512580872, 'timestamp': '2025-09-10 02:32:26.069866', 'step': 5455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:26.099456', 'step': 5455, 'epoch': 1} {'type': 'loss', 'content': 0.09856456518173218, 'timestamp': '2025-09-10 02:32:26.123023', 'step': 5456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:26.153275', 'step': 5456, 'epoch': 1} {'type': 'loss', 'content': 0.11156002432107925, 'timestamp': '2025-09-10 02:32:26.155153', 'step': 5457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:26.184988', 'step': 5457, 'epoch': 1} {'type': 'loss', 'content': 0.2561490833759308, 'timestamp': '2025-09-10 02:32:26.187255', 'step': 5458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:26.217181', 'step': 5458, 'epoch': 1} {'type': 'loss', 'content': 0.12974147498607635, 'timestamp': '2025-09-10 02:32:26.219232', 'step': 5459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.249252', 'step': 5459, 'epoch': 1} {'type': 'loss', 'content': 0.18997056782245636, 'timestamp': '2025-09-10 02:32:26.272730', 'step': 5460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:26.304946', 'step': 5460, 'epoch': 1} {'type': 'loss', 'content': 0.14207452535629272, 'timestamp': '2025-09-10 02:32:26.308288', 'step': 5461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:26.338869', 'step': 5461, 'epoch': 1} {'type': 'loss', 'content': 0.11861515045166016, 'timestamp': '2025-09-10 02:32:26.340584', 'step': 5462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.377264', 'step': 5462, 'epoch': 1} {'type': 'loss', 'content': 0.1334177553653717, 'timestamp': '2025-09-10 02:32:26.378808', 'step': 5463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:26.409852', 'step': 5463, 'epoch': 1} {'type': 'loss', 'content': 0.08068292587995529, 'timestamp': '2025-09-10 02:32:26.433413', 'step': 5464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.463384', 'step': 5464, 'epoch': 1} {'type': 'loss', 'content': 0.07937930524349213, 'timestamp': '2025-09-10 02:32:26.465555', 'step': 5465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.494969', 'step': 5465, 'epoch': 1} {'type': 'loss', 'content': 0.18755154311656952, 'timestamp': '2025-09-10 02:32:26.496665', 'step': 5466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:26.526530', 'step': 5466, 'epoch': 1} {'type': 'loss', 'content': 0.15082316100597382, 'timestamp': '2025-09-10 02:32:26.528611', 'step': 5467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.559161', 'step': 5467, 'epoch': 1} {'type': 'loss', 'content': 0.21195350587368011, 'timestamp': '2025-09-10 02:32:26.587946', 'step': 5468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:26.620210', 'step': 5468, 'epoch': 1} {'type': 'loss', 'content': 0.10526899993419647, 'timestamp': '2025-09-10 02:32:26.622160', 'step': 5469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.652857', 'step': 5469, 'epoch': 1} {'type': 'loss', 'content': 0.21976757049560547, 'timestamp': '2025-09-10 02:32:26.655463', 'step': 5470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:26.688615', 'step': 5470, 'epoch': 1} {'type': 'loss', 'content': 0.13264316320419312, 'timestamp': '2025-09-10 02:32:26.690790', 'step': 5471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.720669', 'step': 5471, 'epoch': 1} {'type': 'loss', 'content': 0.13546350598335266, 'timestamp': '2025-09-10 02:32:26.744206', 'step': 5472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:26.778227', 'step': 5472, 'epoch': 1} {'type': 'loss', 'content': 0.15011169016361237, 'timestamp': '2025-09-10 02:32:26.782298', 'step': 5473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.812284', 'step': 5473, 'epoch': 1} {'type': 'loss', 'content': 0.18308162689208984, 'timestamp': '2025-09-10 02:32:26.814178', 'step': 5474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:26.845007', 'step': 5474, 'epoch': 1} {'type': 'loss', 'content': 0.09494849294424057, 'timestamp': '2025-09-10 02:32:26.846990', 'step': 5475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:26.878928', 'step': 5475, 'epoch': 1} {'type': 'loss', 'content': 0.10309014469385147, 'timestamp': '2025-09-10 02:32:26.902243', 'step': 5476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:26.942096', 'step': 5476, 'epoch': 1} {'type': 'loss', 'content': 0.21964630484580994, 'timestamp': '2025-09-10 02:32:26.944332', 'step': 5477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:26.974465', 'step': 5477, 'epoch': 1} {'type': 'loss', 'content': 0.23829062283039093, 'timestamp': '2025-09-10 02:32:26.976586', 'step': 5478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.007195', 'step': 5478, 'epoch': 1} {'type': 'loss', 'content': 0.15947185456752777, 'timestamp': '2025-09-10 02:32:27.009235', 'step': 5479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.039314', 'step': 5479, 'epoch': 1} {'type': 'loss', 'content': 0.11464107036590576, 'timestamp': '2025-09-10 02:32:27.062695', 'step': 5480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.096829', 'step': 5480, 'epoch': 1} {'type': 'loss', 'content': 0.2273910641670227, 'timestamp': '2025-09-10 02:32:27.103333', 'step': 5481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.137673', 'step': 5481, 'epoch': 1} {'type': 'loss', 'content': 0.11478732526302338, 'timestamp': '2025-09-10 02:32:27.139911', 'step': 5482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.169971', 'step': 5482, 'epoch': 1} {'type': 'loss', 'content': 0.13476771116256714, 'timestamp': '2025-09-10 02:32:27.173150', 'step': 5483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:27.205415', 'step': 5483, 'epoch': 1} {'type': 'loss', 'content': 0.19112804532051086, 'timestamp': '2025-09-10 02:32:27.231001', 'step': 5484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.260939', 'step': 5484, 'epoch': 1} {'type': 'loss', 'content': 0.10825655609369278, 'timestamp': '2025-09-10 02:32:27.263104', 'step': 5485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:27.293264', 'step': 5485, 'epoch': 1} {'type': 'loss', 'content': 0.21128787100315094, 'timestamp': '2025-09-10 02:32:27.295511', 'step': 5486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:27.325864', 'step': 5486, 'epoch': 1} {'type': 'loss', 'content': 0.13644257187843323, 'timestamp': '2025-09-10 02:32:27.328621', 'step': 5487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.359586', 'step': 5487, 'epoch': 1} {'type': 'loss', 'content': 0.14995568990707397, 'timestamp': '2025-09-10 02:32:27.382974', 'step': 5488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.413628', 'step': 5488, 'epoch': 1} {'type': 'loss', 'content': 0.09350764751434326, 'timestamp': '2025-09-10 02:32:27.416090', 'step': 5489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:27.447819', 'step': 5489, 'epoch': 1} {'type': 'loss', 'content': 0.16214637458324432, 'timestamp': '2025-09-10 02:32:27.450516', 'step': 5490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.482819', 'step': 5490, 'epoch': 1} {'type': 'loss', 'content': 0.07200122624635696, 'timestamp': '2025-09-10 02:32:27.485876', 'step': 5491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.517967', 'step': 5491, 'epoch': 1} {'type': 'loss', 'content': 0.10066942125558853, 'timestamp': '2025-09-10 02:32:27.542678', 'step': 5492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.586054', 'step': 5492, 'epoch': 1} {'type': 'loss', 'content': 0.1790463775396347, 'timestamp': '2025-09-10 02:32:27.589255', 'step': 5493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:27.620987', 'step': 5493, 'epoch': 1} {'type': 'loss', 'content': 0.12559272348880768, 'timestamp': '2025-09-10 02:32:27.623646', 'step': 5494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.655587', 'step': 5494, 'epoch': 1} {'type': 'loss', 'content': 0.15275900065898895, 'timestamp': '2025-09-10 02:32:27.658238', 'step': 5495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.694304', 'step': 5495, 'epoch': 1} {'type': 'loss', 'content': 0.22604788839817047, 'timestamp': '2025-09-10 02:32:27.717890', 'step': 5496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.749010', 'step': 5496, 'epoch': 1} {'type': 'loss', 'content': 0.13395726680755615, 'timestamp': '2025-09-10 02:32:27.751273', 'step': 5497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.781027', 'step': 5497, 'epoch': 1} {'type': 'loss', 'content': 0.09793227165937424, 'timestamp': '2025-09-10 02:32:27.783379', 'step': 5498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.813714', 'step': 5498, 'epoch': 1} {'type': 'loss', 'content': 0.17998693883419037, 'timestamp': '2025-09-10 02:32:27.816095', 'step': 5499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:27.846384', 'step': 5499, 'epoch': 1} {'type': 'loss', 'content': 0.1867612898349762, 'timestamp': '2025-09-10 02:32:27.869982', 'step': 5500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-09-10 02:32:32.472739', 'step': 5500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:32.504866', 'step': 5500, 'epoch': 1} {'type': 'loss', 'content': 0.18050813674926758, 'timestamp': '2025-09-10 02:32:32.507231', 'step': 5501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:32.538398', 'step': 5501, 'epoch': 1} {'type': 'loss', 'content': 0.2577119767665863, 'timestamp': '2025-09-10 02:32:32.540620', 'step': 5502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:32.571168', 'step': 5502, 'epoch': 1} {'type': 'loss', 'content': 0.02397868037223816, 'timestamp': '2025-09-10 02:32:32.573468', 'step': 5503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:32.604376', 'step': 5503, 'epoch': 1} {'type': 'loss', 'content': 0.12035542726516724, 'timestamp': '2025-09-10 02:32:32.628432', 'step': 5504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:32.659741', 'step': 5504, 'epoch': 1} {'type': 'loss', 'content': 0.12073953449726105, 'timestamp': '2025-09-10 02:32:32.662339', 'step': 5505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:32.693136', 'step': 5505, 'epoch': 1} {'type': 'loss', 'content': 0.19796563684940338, 'timestamp': '2025-09-10 02:32:32.697256', 'step': 5506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:32.727554', 'step': 5506, 'epoch': 1} {'type': 'loss', 'content': 0.19643829762935638, 'timestamp': '2025-09-10 02:32:32.730570', 'step': 5507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:32.761201', 'step': 5507, 'epoch': 1} {'type': 'loss', 'content': 0.15505540370941162, 'timestamp': '2025-09-10 02:32:32.784709', 'step': 5508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:32.815177', 'step': 5508, 'epoch': 1} {'type': 'loss', 'content': 0.16365370154380798, 'timestamp': '2025-09-10 02:32:32.817648', 'step': 5509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:32.848201', 'step': 5509, 'epoch': 1} {'type': 'loss', 'content': 0.18021391332149506, 'timestamp': '2025-09-10 02:32:32.850533', 'step': 5510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:32.881489', 'step': 5510, 'epoch': 1} {'type': 'loss', 'content': 0.16912591457366943, 'timestamp': '2025-09-10 02:32:32.884230', 'step': 5511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:32.914408', 'step': 5511, 'epoch': 1} {'type': 'loss', 'content': 0.11992253363132477, 'timestamp': '2025-09-10 02:32:32.937989', 'step': 5512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:32.969333', 'step': 5512, 'epoch': 1} {'type': 'loss', 'content': 0.20895396173000336, 'timestamp': '2025-09-10 02:32:32.971620', 'step': 5513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:33.002270', 'step': 5513, 'epoch': 1} {'type': 'loss', 'content': 0.13635845482349396, 'timestamp': '2025-09-10 02:32:33.004693', 'step': 5514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:33.035772', 'step': 5514, 'epoch': 1} {'type': 'loss', 'content': 0.14414405822753906, 'timestamp': '2025-09-10 02:32:33.038270', 'step': 5515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:33.069154', 'step': 5515, 'epoch': 1} {'type': 'loss', 'content': 0.07793629169464111, 'timestamp': '2025-09-10 02:32:33.092757', 'step': 5516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:33.123468', 'step': 5516, 'epoch': 1} {'type': 'loss', 'content': 0.12331706285476685, 'timestamp': '2025-09-10 02:32:33.125691', 'step': 5517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:33.158335', 'step': 5517, 'epoch': 1} {'type': 'loss', 'content': 0.18030309677124023, 'timestamp': '2025-09-10 02:32:33.160946', 'step': 5518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:33.192721', 'step': 5518, 'epoch': 1} {'type': 'loss', 'content': 0.10280487686395645, 'timestamp': '2025-09-10 02:32:33.195110', 'step': 5519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:33.225207', 'step': 5519, 'epoch': 1} {'type': 'loss', 'content': 0.09790704399347305, 'timestamp': '2025-09-10 02:32:33.248552', 'step': 5520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:33.279758', 'step': 5520, 'epoch': 1} {'type': 'loss', 'content': 0.17062462866306305, 'timestamp': '2025-09-10 02:32:33.281676', 'step': 5521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:33.311428', 'step': 5521, 'epoch': 1} {'type': 'loss', 'content': 0.09813455492258072, 'timestamp': '2025-09-10 02:32:33.313344', 'step': 5522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:33.343065', 'step': 5522, 'epoch': 1} {'type': 'loss', 'content': 0.14003585278987885, 'timestamp': '2025-09-10 02:32:33.345465', 'step': 5523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:33.375981', 'step': 5523, 'epoch': 1} {'type': 'loss', 'content': 0.13419461250305176, 'timestamp': '2025-09-10 02:32:33.399747', 'step': 5524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:33.430084', 'step': 5524, 'epoch': 1} {'type': 'loss', 'content': 0.0975419133901596, 'timestamp': '2025-09-10 02:32:33.433425', 'step': 5525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:32:33.470012', 'step': 5525, 'epoch': 1} {'type': 'loss', 'content': 0.20923788845539093, 'timestamp': '2025-09-10 02:32:33.476641', 'step': 5526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:33.511719', 'step': 5526, 'epoch': 1} {'type': 'loss', 'content': 0.1437995433807373, 'timestamp': '2025-09-10 02:32:33.516069', 'step': 5527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:33.550985', 'step': 5527, 'epoch': 1} {'type': 'loss', 'content': 0.15593788027763367, 'timestamp': '2025-09-10 02:32:33.575232', 'step': 5528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:33.615430', 'step': 5528, 'epoch': 1} {'type': 'loss', 'content': 0.08112671226263046, 'timestamp': '2025-09-10 02:32:33.619095', 'step': 5529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:33.654524', 'step': 5529, 'epoch': 1} {'type': 'loss', 'content': 0.14072489738464355, 'timestamp': '2025-09-10 02:32:33.657819', 'step': 5530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:33.692435', 'step': 5530, 'epoch': 1} {'type': 'loss', 'content': 0.1443309634923935, 'timestamp': '2025-09-10 02:32:33.695460', 'step': 5531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:33.725491', 'step': 5531, 'epoch': 1} {'type': 'loss', 'content': 0.2047986090183258, 'timestamp': '2025-09-10 02:32:33.748990', 'step': 5532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:33.780880', 'step': 5532, 'epoch': 1} {'type': 'loss', 'content': 0.1582900583744049, 'timestamp': '2025-09-10 02:32:33.784102', 'step': 5533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:33.814528', 'step': 5533, 'epoch': 1} {'type': 'loss', 'content': 0.2571098804473877, 'timestamp': '2025-09-10 02:32:33.816659', 'step': 5534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:32:33.847129', 'step': 5534, 'epoch': 1} {'type': 'loss', 'content': 0.16414447128772736, 'timestamp': '2025-09-10 02:32:33.851634', 'step': 5535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:33.882284', 'step': 5535, 'epoch': 1} {'type': 'loss', 'content': 0.15683485567569733, 'timestamp': '2025-09-10 02:32:33.905601', 'step': 5536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:33.936180', 'step': 5536, 'epoch': 1} {'type': 'loss', 'content': 0.14211149513721466, 'timestamp': '2025-09-10 02:32:33.938556', 'step': 5537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:33.973275', 'step': 5537, 'epoch': 1} {'type': 'loss', 'content': 0.20900003612041473, 'timestamp': '2025-09-10 02:32:33.975544', 'step': 5538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:34.005734', 'step': 5538, 'epoch': 1} {'type': 'loss', 'content': 0.18705199658870697, 'timestamp': '2025-09-10 02:32:34.008858', 'step': 5539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:34.042814', 'step': 5539, 'epoch': 1} {'type': 'loss', 'content': 0.15964333713054657, 'timestamp': '2025-09-10 02:32:34.066630', 'step': 5540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:34.097789', 'step': 5540, 'epoch': 1} {'type': 'loss', 'content': 0.10785768926143646, 'timestamp': '2025-09-10 02:32:34.102353', 'step': 5541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:34.134022', 'step': 5541, 'epoch': 1} {'type': 'loss', 'content': 0.14542995393276215, 'timestamp': '2025-09-10 02:32:34.136242', 'step': 5542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:34.166755', 'step': 5542, 'epoch': 1} {'type': 'loss', 'content': 0.11958662420511246, 'timestamp': '2025-09-10 02:32:34.169239', 'step': 5543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:34.200499', 'step': 5543, 'epoch': 1} {'type': 'loss', 'content': 0.17705677449703217, 'timestamp': '2025-09-10 02:32:34.224280', 'step': 5544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:34.257314', 'step': 5544, 'epoch': 1} {'type': 'loss', 'content': 0.10412374883890152, 'timestamp': '2025-09-10 02:32:34.259702', 'step': 5545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:34.289839', 'step': 5545, 'epoch': 1} {'type': 'loss', 'content': 0.20989707112312317, 'timestamp': '2025-09-10 02:32:34.291817', 'step': 5546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:34.323682', 'step': 5546, 'epoch': 1} {'type': 'loss', 'content': 0.13575784862041473, 'timestamp': '2025-09-10 02:32:34.327691', 'step': 5547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:34.360318', 'step': 5547, 'epoch': 1} {'type': 'loss', 'content': 0.19734999537467957, 'timestamp': '2025-09-10 02:32:34.383873', 'step': 5548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:34.415085', 'step': 5548, 'epoch': 1} {'type': 'loss', 'content': 0.16031426191329956, 'timestamp': '2025-09-10 02:32:34.417184', 'step': 5549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:34.451392', 'step': 5549, 'epoch': 1} {'type': 'loss', 'content': 0.1678810566663742, 'timestamp': '2025-09-10 02:32:34.454471', 'step': 5550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:34.492878', 'step': 5550, 'epoch': 1} {'type': 'loss', 'content': 0.29923179745674133, 'timestamp': '2025-09-10 02:32:34.498398', 'step': 5551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:34.545158', 'step': 5551, 'epoch': 1} {'type': 'loss', 'content': 0.22418798506259918, 'timestamp': '2025-09-10 02:32:34.570857', 'step': 5552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:34.611092', 'step': 5552, 'epoch': 1} {'type': 'loss', 'content': 0.2511746883392334, 'timestamp': '2025-09-10 02:32:34.613928', 'step': 5553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:34.649898', 'step': 5553, 'epoch': 1} {'type': 'loss', 'content': 0.16910330951213837, 'timestamp': '2025-09-10 02:32:34.654769', 'step': 5554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:34.689931', 'step': 5554, 'epoch': 1} {'type': 'loss', 'content': 0.12196037918329239, 'timestamp': '2025-09-10 02:32:34.693813', 'step': 5555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:34.734962', 'step': 5555, 'epoch': 1} {'type': 'loss', 'content': 0.13065116107463837, 'timestamp': '2025-09-10 02:32:34.758979', 'step': 5556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:34.797863', 'step': 5556, 'epoch': 1} {'type': 'loss', 'content': 0.21682094037532806, 'timestamp': '2025-09-10 02:32:34.809587', 'step': 5557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:34.863223', 'step': 5557, 'epoch': 1} {'type': 'loss', 'content': 0.17866507172584534, 'timestamp': '2025-09-10 02:32:34.872515', 'step': 5558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:34.908844', 'step': 5558, 'epoch': 1} {'type': 'loss', 'content': 0.1332315057516098, 'timestamp': '2025-09-10 02:32:34.911756', 'step': 5559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:34.950054', 'step': 5559, 'epoch': 1} {'type': 'loss', 'content': 0.07789010554552078, 'timestamp': '2025-09-10 02:32:34.976894', 'step': 5560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:35.012079', 'step': 5560, 'epoch': 1} {'type': 'loss', 'content': 0.09441297501325607, 'timestamp': '2025-09-10 02:32:35.025503', 'step': 5561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:35.079161', 'step': 5561, 'epoch': 1} {'type': 'loss', 'content': 0.12033476680517197, 'timestamp': '2025-09-10 02:32:35.083572', 'step': 5562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:35.121805', 'step': 5562, 'epoch': 1} {'type': 'loss', 'content': 0.24791602790355682, 'timestamp': '2025-09-10 02:32:35.129819', 'step': 5563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:32:35.162409', 'step': 5563, 'epoch': 1} {'type': 'loss', 'content': 0.138741135597229, 'timestamp': '2025-09-10 02:32:35.187867', 'step': 5564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:35.221851', 'step': 5564, 'epoch': 1} {'type': 'loss', 'content': 0.15477952361106873, 'timestamp': '2025-09-10 02:32:35.227866', 'step': 5565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:35.260797', 'step': 5565, 'epoch': 1} {'type': 'loss', 'content': 0.26799654960632324, 'timestamp': '2025-09-10 02:32:35.267245', 'step': 5566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:35.300431', 'step': 5566, 'epoch': 1} {'type': 'loss', 'content': 0.15009860694408417, 'timestamp': '2025-09-10 02:32:35.305396', 'step': 5567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:35.342466', 'step': 5567, 'epoch': 1} {'type': 'loss', 'content': 0.16547176241874695, 'timestamp': '2025-09-10 02:32:35.367430', 'step': 5568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:35.406397', 'step': 5568, 'epoch': 1} {'type': 'loss', 'content': 0.23756083846092224, 'timestamp': '2025-09-10 02:32:35.409429', 'step': 5569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:35.447242', 'step': 5569, 'epoch': 1} {'type': 'loss', 'content': 0.1394704282283783, 'timestamp': '2025-09-10 02:32:35.452766', 'step': 5570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:35.488679', 'step': 5570, 'epoch': 1} {'type': 'loss', 'content': 0.17900952696800232, 'timestamp': '2025-09-10 02:32:35.491123', 'step': 5571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:35.525427', 'step': 5571, 'epoch': 1} {'type': 'loss', 'content': 0.10950663685798645, 'timestamp': '2025-09-10 02:32:35.549900', 'step': 5572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:35.584312', 'step': 5572, 'epoch': 1} {'type': 'loss', 'content': 0.08140315860509872, 'timestamp': '2025-09-10 02:32:35.588199', 'step': 5573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:35.620018', 'step': 5573, 'epoch': 1} {'type': 'loss', 'content': 0.1437874138355255, 'timestamp': '2025-09-10 02:32:35.622345', 'step': 5574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:35.657480', 'step': 5574, 'epoch': 1} {'type': 'loss', 'content': 0.11079186946153641, 'timestamp': '2025-09-10 02:32:35.661928', 'step': 5575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:35.698586', 'step': 5575, 'epoch': 1} {'type': 'loss', 'content': 0.10754451900720596, 'timestamp': '2025-09-10 02:32:35.725947', 'step': 5576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:35.779617', 'step': 5576, 'epoch': 1} {'type': 'loss', 'content': 0.1411438286304474, 'timestamp': '2025-09-10 02:32:35.791348', 'step': 5577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:35.845523', 'step': 5577, 'epoch': 1} {'type': 'loss', 'content': 0.13162477314472198, 'timestamp': '2025-09-10 02:32:35.851993', 'step': 5578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:35.896096', 'step': 5578, 'epoch': 1} {'type': 'loss', 'content': 0.11992056667804718, 'timestamp': '2025-09-10 02:32:35.900678', 'step': 5579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:35.937315', 'step': 5579, 'epoch': 1} {'type': 'loss', 'content': 0.16762672364711761, 'timestamp': '2025-09-10 02:32:35.963398', 'step': 5580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:36.005365', 'step': 5580, 'epoch': 1} {'type': 'loss', 'content': 0.10156523436307907, 'timestamp': '2025-09-10 02:32:36.012673', 'step': 5581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.054810', 'step': 5581, 'epoch': 1} {'type': 'loss', 'content': 0.1367892622947693, 'timestamp': '2025-09-10 02:32:36.070846', 'step': 5582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:36.113233', 'step': 5582, 'epoch': 1} {'type': 'loss', 'content': 0.12208042293787003, 'timestamp': '2025-09-10 02:32:36.118805', 'step': 5583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:36.158553', 'step': 5583, 'epoch': 1} {'type': 'loss', 'content': 0.15942302346229553, 'timestamp': '2025-09-10 02:32:36.184203', 'step': 5584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:36.220971', 'step': 5584, 'epoch': 1} {'type': 'loss', 'content': 0.2521514892578125, 'timestamp': '2025-09-10 02:32:36.227379', 'step': 5585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:36.265917', 'step': 5585, 'epoch': 1} {'type': 'loss', 'content': 0.06934542953968048, 'timestamp': '2025-09-10 02:32:36.270894', 'step': 5586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:36.304808', 'step': 5586, 'epoch': 1} {'type': 'loss', 'content': 0.09268844127655029, 'timestamp': '2025-09-10 02:32:36.318529', 'step': 5587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.365661', 'step': 5587, 'epoch': 1} {'type': 'loss', 'content': 0.1495865434408188, 'timestamp': '2025-09-10 02:32:36.399431', 'step': 5588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:36.460536', 'step': 5588, 'epoch': 1} {'type': 'loss', 'content': 0.13731162250041962, 'timestamp': '2025-09-10 02:32:36.463554', 'step': 5589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.497781', 'step': 5589, 'epoch': 1} {'type': 'loss', 'content': 0.1004517674446106, 'timestamp': '2025-09-10 02:32:36.502434', 'step': 5590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:36.537904', 'step': 5590, 'epoch': 1} {'type': 'loss', 'content': 0.21044397354125977, 'timestamp': '2025-09-10 02:32:36.540920', 'step': 5591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:36.577859', 'step': 5591, 'epoch': 1} {'type': 'loss', 'content': 0.07552650570869446, 'timestamp': '2025-09-10 02:32:36.602712', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:32:45.543901', 'step': 5592, 'epoch': 1} {'type': 'pplx', 'content': 8017.508861860097, 'timestamp': '2025-09-10 02:32:45.546553', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:45.575931', 'step': 5592, 'epoch': 1} {'type': 'loss', 'content': 0.13234558701515198, 'timestamp': '2025-09-10 02:32:45.578197', 'step': 5593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:45.608374', 'step': 5593, 'epoch': 1} {'type': 'loss', 'content': 0.12888194620609283, 'timestamp': '2025-09-10 02:32:45.610569', 'step': 5594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:45.641132', 'step': 5594, 'epoch': 1} {'type': 'loss', 'content': 0.17078836262226105, 'timestamp': '2025-09-10 02:32:45.643387', 'step': 5595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:45.673988', 'step': 5595, 'epoch': 1} {'type': 'loss', 'content': 0.16204270720481873, 'timestamp': '2025-09-10 02:32:45.697237', 'step': 5596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:45.727205', 'step': 5596, 'epoch': 1} {'type': 'loss', 'content': 0.29505231976509094, 'timestamp': '2025-09-10 02:32:45.729471', 'step': 5597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:45.760940', 'step': 5597, 'epoch': 1} {'type': 'loss', 'content': 0.11448840796947479, 'timestamp': '2025-09-10 02:32:45.762927', 'step': 5598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:45.792971', 'step': 5598, 'epoch': 1} {'type': 'loss', 'content': 0.1432320475578308, 'timestamp': '2025-09-10 02:32:45.795325', 'step': 5599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:45.826033', 'step': 5599, 'epoch': 1} {'type': 'loss', 'content': 0.14020368456840515, 'timestamp': '2025-09-10 02:32:45.849637', 'step': 5600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:45.882587', 'step': 5600, 'epoch': 1} {'type': 'loss', 'content': 0.1781342476606369, 'timestamp': '2025-09-10 02:32:45.884542', 'step': 5601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:45.915670', 'step': 5601, 'epoch': 1} {'type': 'loss', 'content': 0.15920989215373993, 'timestamp': '2025-09-10 02:32:45.917812', 'step': 5602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:45.947722', 'step': 5602, 'epoch': 1} {'type': 'loss', 'content': 0.19845888018608093, 'timestamp': '2025-09-10 02:32:45.950299', 'step': 5603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:45.980083', 'step': 5603, 'epoch': 1} {'type': 'loss', 'content': 0.14232955873012543, 'timestamp': '2025-09-10 02:32:46.003691', 'step': 5604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.034620', 'step': 5604, 'epoch': 1} {'type': 'loss', 'content': 0.1383083164691925, 'timestamp': '2025-09-10 02:32:46.037153', 'step': 5605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:46.068077', 'step': 5605, 'epoch': 1} {'type': 'loss', 'content': 0.06117326766252518, 'timestamp': '2025-09-10 02:32:46.070661', 'step': 5606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:46.102104', 'step': 5606, 'epoch': 1} {'type': 'loss', 'content': 0.15682153403759003, 'timestamp': '2025-09-10 02:32:46.104269', 'step': 5607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:46.134418', 'step': 5607, 'epoch': 1} {'type': 'loss', 'content': 0.2107246369123459, 'timestamp': '2025-09-10 02:32:46.157930', 'step': 5608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.192503', 'step': 5608, 'epoch': 1} {'type': 'loss', 'content': 0.17142663896083832, 'timestamp': '2025-09-10 02:32:46.194808', 'step': 5609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:46.225702', 'step': 5609, 'epoch': 1} {'type': 'loss', 'content': 0.1182541474699974, 'timestamp': '2025-09-10 02:32:46.227943', 'step': 5610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.258312', 'step': 5610, 'epoch': 1} {'type': 'loss', 'content': 0.08325305581092834, 'timestamp': '2025-09-10 02:32:46.260474', 'step': 5611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:46.290671', 'step': 5611, 'epoch': 1} {'type': 'loss', 'content': 0.11810991168022156, 'timestamp': '2025-09-10 02:32:46.314505', 'step': 5612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:46.346299', 'step': 5612, 'epoch': 1} {'type': 'loss', 'content': 0.15325221419334412, 'timestamp': '2025-09-10 02:32:46.348342', 'step': 5613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.378655', 'step': 5613, 'epoch': 1} {'type': 'loss', 'content': 0.1702997088432312, 'timestamp': '2025-09-10 02:32:46.381207', 'step': 5614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.411704', 'step': 5614, 'epoch': 1} {'type': 'loss', 'content': 0.13536539673805237, 'timestamp': '2025-09-10 02:32:46.413984', 'step': 5615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:46.444607', 'step': 5615, 'epoch': 1} {'type': 'loss', 'content': 0.11628779768943787, 'timestamp': '2025-09-10 02:32:46.468047', 'step': 5616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.499578', 'step': 5616, 'epoch': 1} {'type': 'loss', 'content': 0.12041396647691727, 'timestamp': '2025-09-10 02:32:46.501900', 'step': 5617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.532701', 'step': 5617, 'epoch': 1} {'type': 'loss', 'content': 0.19098810851573944, 'timestamp': '2025-09-10 02:32:46.535082', 'step': 5618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:46.565013', 'step': 5618, 'epoch': 1} {'type': 'loss', 'content': 0.11052040010690689, 'timestamp': '2025-09-10 02:32:46.567210', 'step': 5619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.597645', 'step': 5619, 'epoch': 1} {'type': 'loss', 'content': 0.1616506725549698, 'timestamp': '2025-09-10 02:32:46.621264', 'step': 5620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:46.651677', 'step': 5620, 'epoch': 1} {'type': 'loss', 'content': 0.26637783646583557, 'timestamp': '2025-09-10 02:32:46.653975', 'step': 5621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:46.684558', 'step': 5621, 'epoch': 1} {'type': 'loss', 'content': 0.15897640585899353, 'timestamp': '2025-09-10 02:32:46.687237', 'step': 5622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:46.717542', 'step': 5622, 'epoch': 1} {'type': 'loss', 'content': 0.1337219923734665, 'timestamp': '2025-09-10 02:32:46.719776', 'step': 5623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:46.752400', 'step': 5623, 'epoch': 1} {'type': 'loss', 'content': 0.16627714037895203, 'timestamp': '2025-09-10 02:32:46.775635', 'step': 5624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.806146', 'step': 5624, 'epoch': 1} {'type': 'loss', 'content': 0.19984756410121918, 'timestamp': '2025-09-10 02:32:46.808580', 'step': 5625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.839381', 'step': 5625, 'epoch': 1} {'type': 'loss', 'content': 0.20068521797657013, 'timestamp': '2025-09-10 02:32:46.841494', 'step': 5626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.872701', 'step': 5626, 'epoch': 1} {'type': 'loss', 'content': 0.3177243769168854, 'timestamp': '2025-09-10 02:32:46.875165', 'step': 5627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.905142', 'step': 5627, 'epoch': 1} {'type': 'loss', 'content': 0.1578599363565445, 'timestamp': '2025-09-10 02:32:46.928473', 'step': 5628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.958677', 'step': 5628, 'epoch': 1} {'type': 'loss', 'content': 0.18552933633327484, 'timestamp': '2025-09-10 02:32:46.960827', 'step': 5629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.991017', 'step': 5629, 'epoch': 1} {'type': 'loss', 'content': 0.23735885322093964, 'timestamp': '2025-09-10 02:32:46.993089', 'step': 5630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.023637', 'step': 5630, 'epoch': 1} {'type': 'loss', 'content': 0.16661153733730316, 'timestamp': '2025-09-10 02:32:47.027117', 'step': 5631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.057568', 'step': 5631, 'epoch': 1} {'type': 'loss', 'content': 0.18119147419929504, 'timestamp': '2025-09-10 02:32:47.081215', 'step': 5632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.112748', 'step': 5632, 'epoch': 1} {'type': 'loss', 'content': 0.1613892763853073, 'timestamp': '2025-09-10 02:32:47.114790', 'step': 5633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:47.144647', 'step': 5633, 'epoch': 1} {'type': 'loss', 'content': 0.15022434294223785, 'timestamp': '2025-09-10 02:32:47.146996', 'step': 5634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.179804', 'step': 5634, 'epoch': 1} {'type': 'loss', 'content': 0.20703984797000885, 'timestamp': '2025-09-10 02:32:47.182138', 'step': 5635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.213595', 'step': 5635, 'epoch': 1} {'type': 'loss', 'content': 0.19911694526672363, 'timestamp': '2025-09-10 02:32:47.238007', 'step': 5636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:47.268218', 'step': 5636, 'epoch': 1} {'type': 'loss', 'content': 0.16049522161483765, 'timestamp': '2025-09-10 02:32:47.270186', 'step': 5637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:47.300076', 'step': 5637, 'epoch': 1} {'type': 'loss', 'content': 0.13503846526145935, 'timestamp': '2025-09-10 02:32:47.304599', 'step': 5638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.335253', 'step': 5638, 'epoch': 1} {'type': 'loss', 'content': 0.18600915372371674, 'timestamp': '2025-09-10 02:32:47.337347', 'step': 5639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:47.367599', 'step': 5639, 'epoch': 1} {'type': 'loss', 'content': 0.17545676231384277, 'timestamp': '2025-09-10 02:32:47.391049', 'step': 5640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.422031', 'step': 5640, 'epoch': 1} {'type': 'loss', 'content': 0.14353229105472565, 'timestamp': '2025-09-10 02:32:47.424264', 'step': 5641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:47.456230', 'step': 5641, 'epoch': 1} {'type': 'loss', 'content': 0.24207499623298645, 'timestamp': '2025-09-10 02:32:47.458536', 'step': 5642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:47.489390', 'step': 5642, 'epoch': 1} {'type': 'loss', 'content': 0.24242421984672546, 'timestamp': '2025-09-10 02:32:47.491877', 'step': 5643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.523199', 'step': 5643, 'epoch': 1} {'type': 'loss', 'content': 0.16367259621620178, 'timestamp': '2025-09-10 02:32:47.546719', 'step': 5644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.576449', 'step': 5644, 'epoch': 1} {'type': 'loss', 'content': 0.15458324551582336, 'timestamp': '2025-09-10 02:32:47.578579', 'step': 5645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.607889', 'step': 5645, 'epoch': 1} {'type': 'loss', 'content': 0.1778489649295807, 'timestamp': '2025-09-10 02:32:47.610125', 'step': 5646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:47.640326', 'step': 5646, 'epoch': 1} {'type': 'loss', 'content': 0.08616703003644943, 'timestamp': '2025-09-10 02:32:47.642498', 'step': 5647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:47.672934', 'step': 5647, 'epoch': 1} {'type': 'loss', 'content': 0.2171112596988678, 'timestamp': '2025-09-10 02:32:47.697934', 'step': 5648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.728706', 'step': 5648, 'epoch': 1} {'type': 'loss', 'content': 0.1590648740530014, 'timestamp': '2025-09-10 02:32:47.731448', 'step': 5649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:47.763903', 'step': 5649, 'epoch': 1} {'type': 'loss', 'content': 0.15099908411502838, 'timestamp': '2025-09-10 02:32:47.766125', 'step': 5650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.795688', 'step': 5650, 'epoch': 1} {'type': 'loss', 'content': 0.12590286135673523, 'timestamp': '2025-09-10 02:32:47.798204', 'step': 5651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.828046', 'step': 5651, 'epoch': 1} {'type': 'loss', 'content': 0.04567098245024681, 'timestamp': '2025-09-10 02:32:47.851116', 'step': 5652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.881595', 'step': 5652, 'epoch': 1} {'type': 'loss', 'content': 0.11041825264692307, 'timestamp': '2025-09-10 02:32:47.883919', 'step': 5653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:47.914585', 'step': 5653, 'epoch': 1} {'type': 'loss', 'content': 0.2001861184835434, 'timestamp': '2025-09-10 02:32:47.916900', 'step': 5654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:47.947380', 'step': 5654, 'epoch': 1} {'type': 'loss', 'content': 0.15305140614509583, 'timestamp': '2025-09-10 02:32:47.949504', 'step': 5655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.979556', 'step': 5655, 'epoch': 1} {'type': 'loss', 'content': 0.1764320731163025, 'timestamp': '2025-09-10 02:32:48.003002', 'step': 5656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.033054', 'step': 5656, 'epoch': 1} {'type': 'loss', 'content': 0.1502121090888977, 'timestamp': '2025-09-10 02:32:48.035495', 'step': 5657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:48.066379', 'step': 5657, 'epoch': 1} {'type': 'loss', 'content': 0.2509908676147461, 'timestamp': '2025-09-10 02:32:48.068546', 'step': 5658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.098427', 'step': 5658, 'epoch': 1} {'type': 'loss', 'content': 0.17471742630004883, 'timestamp': '2025-09-10 02:32:48.100626', 'step': 5659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.131490', 'step': 5659, 'epoch': 1} {'type': 'loss', 'content': 0.09873877465724945, 'timestamp': '2025-09-10 02:32:48.155461', 'step': 5660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.186260', 'step': 5660, 'epoch': 1} {'type': 'loss', 'content': 0.16286887228488922, 'timestamp': '2025-09-10 02:32:48.188218', 'step': 5661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.218390', 'step': 5661, 'epoch': 1} {'type': 'loss', 'content': 0.16593019664287567, 'timestamp': '2025-09-10 02:32:48.221130', 'step': 5662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.250846', 'step': 5662, 'epoch': 1} {'type': 'loss', 'content': 0.10522012412548065, 'timestamp': '2025-09-10 02:32:48.253295', 'step': 5663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.283371', 'step': 5663, 'epoch': 1} {'type': 'loss', 'content': 0.12182627618312836, 'timestamp': '2025-09-10 02:32:48.306744', 'step': 5664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:48.336620', 'step': 5664, 'epoch': 1} {'type': 'loss', 'content': 0.11273741722106934, 'timestamp': '2025-09-10 02:32:48.338944', 'step': 5665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:48.370727', 'step': 5665, 'epoch': 1} {'type': 'loss', 'content': 0.07226935774087906, 'timestamp': '2025-09-10 02:32:48.372918', 'step': 5666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:48.402916', 'step': 5666, 'epoch': 1} {'type': 'loss', 'content': 0.10041820257902145, 'timestamp': '2025-09-10 02:32:48.405230', 'step': 5667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.435030', 'step': 5667, 'epoch': 1} {'type': 'loss', 'content': 0.13387015461921692, 'timestamp': '2025-09-10 02:32:48.458366', 'step': 5668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:32:48.489757', 'step': 5668, 'epoch': 1} {'type': 'loss', 'content': 0.1298370510339737, 'timestamp': '2025-09-10 02:32:48.492079', 'step': 5669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:48.522500', 'step': 5669, 'epoch': 1} {'type': 'loss', 'content': 0.07154247909784317, 'timestamp': '2025-09-10 02:32:48.524293', 'step': 5670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.553980', 'step': 5670, 'epoch': 1} {'type': 'loss', 'content': 0.15140502154827118, 'timestamp': '2025-09-10 02:32:48.556444', 'step': 5671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:48.585752', 'step': 5671, 'epoch': 1} {'type': 'loss', 'content': 0.17572832107543945, 'timestamp': '2025-09-10 02:32:48.609172', 'step': 5672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:48.639584', 'step': 5672, 'epoch': 1} {'type': 'loss', 'content': 0.24391686916351318, 'timestamp': '2025-09-10 02:32:48.641841', 'step': 5673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:48.672976', 'step': 5673, 'epoch': 1} {'type': 'loss', 'content': 0.12772823870182037, 'timestamp': '2025-09-10 02:32:48.675304', 'step': 5674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.705342', 'step': 5674, 'epoch': 1} {'type': 'loss', 'content': 0.17166924476623535, 'timestamp': '2025-09-10 02:32:48.707695', 'step': 5675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:48.738129', 'step': 5675, 'epoch': 1} {'type': 'loss', 'content': 0.17984771728515625, 'timestamp': '2025-09-10 02:32:48.761669', 'step': 5676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.791656', 'step': 5676, 'epoch': 1} {'type': 'loss', 'content': 0.1355038285255432, 'timestamp': '2025-09-10 02:32:48.793695', 'step': 5677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.823235', 'step': 5677, 'epoch': 1} {'type': 'loss', 'content': 0.11153997480869293, 'timestamp': '2025-09-10 02:32:48.825344', 'step': 5678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:48.855268', 'step': 5678, 'epoch': 1} {'type': 'loss', 'content': 0.0778593197464943, 'timestamp': '2025-09-10 02:32:48.858138', 'step': 5679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.887990', 'step': 5679, 'epoch': 1} {'type': 'loss', 'content': 0.14556393027305603, 'timestamp': '2025-09-10 02:32:48.912613', 'step': 5680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.942529', 'step': 5680, 'epoch': 1} {'type': 'loss', 'content': 0.19189172983169556, 'timestamp': '2025-09-10 02:32:48.944752', 'step': 5681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.975186', 'step': 5681, 'epoch': 1} {'type': 'loss', 'content': 0.15649469196796417, 'timestamp': '2025-09-10 02:32:48.977556', 'step': 5682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:49.007684', 'step': 5682, 'epoch': 1} {'type': 'loss', 'content': 0.15616238117218018, 'timestamp': '2025-09-10 02:32:49.010120', 'step': 5683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.040309', 'step': 5683, 'epoch': 1} {'type': 'loss', 'content': 0.2537979483604431, 'timestamp': '2025-09-10 02:32:49.063442', 'step': 5684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.092670', 'step': 5684, 'epoch': 1} {'type': 'loss', 'content': 0.15268345177173615, 'timestamp': '2025-09-10 02:32:49.094980', 'step': 5685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:49.125301', 'step': 5685, 'epoch': 1} {'type': 'loss', 'content': 0.19394877552986145, 'timestamp': '2025-09-10 02:32:49.127499', 'step': 5686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.158411', 'step': 5686, 'epoch': 1} {'type': 'loss', 'content': 0.14210322499275208, 'timestamp': '2025-09-10 02:32:49.161277', 'step': 5687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:49.192521', 'step': 5687, 'epoch': 1} {'type': 'loss', 'content': 0.21012485027313232, 'timestamp': '2025-09-10 02:32:49.216402', 'step': 5688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:49.247278', 'step': 5688, 'epoch': 1} {'type': 'loss', 'content': 0.1460772156715393, 'timestamp': '2025-09-10 02:32:49.249481', 'step': 5689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:49.280608', 'step': 5689, 'epoch': 1} {'type': 'loss', 'content': 0.20182135701179504, 'timestamp': '2025-09-10 02:32:49.282696', 'step': 5690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:49.311647', 'step': 5690, 'epoch': 1} {'type': 'loss', 'content': 0.21801921725273132, 'timestamp': '2025-09-10 02:32:49.313995', 'step': 5691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:49.344039', 'step': 5691, 'epoch': 1} {'type': 'loss', 'content': 0.14402705430984497, 'timestamp': '2025-09-10 02:32:49.367565', 'step': 5692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:49.398246', 'step': 5692, 'epoch': 1} {'type': 'loss', 'content': 0.1160341426730156, 'timestamp': '2025-09-10 02:32:49.400596', 'step': 5693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:49.430664', 'step': 5693, 'epoch': 1} {'type': 'loss', 'content': 0.24919989705085754, 'timestamp': '2025-09-10 02:32:49.433125', 'step': 5694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:49.464194', 'step': 5694, 'epoch': 1} {'type': 'loss', 'content': 0.13283994793891907, 'timestamp': '2025-09-10 02:32:49.466811', 'step': 5695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:49.497207', 'step': 5695, 'epoch': 1} {'type': 'loss', 'content': 0.2533894181251526, 'timestamp': '2025-09-10 02:32:49.520900', 'step': 5696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:49.551250', 'step': 5696, 'epoch': 1} {'type': 'loss', 'content': 0.1136392131447792, 'timestamp': '2025-09-10 02:32:49.553307', 'step': 5697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:49.584299', 'step': 5697, 'epoch': 1} {'type': 'loss', 'content': 0.1867501139640808, 'timestamp': '2025-09-10 02:32:49.586459', 'step': 5698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.616255', 'step': 5698, 'epoch': 1} {'type': 'loss', 'content': 0.12479399144649506, 'timestamp': '2025-09-10 02:32:49.618191', 'step': 5699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:49.647290', 'step': 5699, 'epoch': 1} {'type': 'loss', 'content': 0.07847437262535095, 'timestamp': '2025-09-10 02:32:49.670966', 'step': 5700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:49.701275', 'step': 5700, 'epoch': 1} {'type': 'loss', 'content': 0.18982920050621033, 'timestamp': '2025-09-10 02:32:49.703685', 'step': 5701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:49.734120', 'step': 5701, 'epoch': 1} {'type': 'loss', 'content': 0.13019871711730957, 'timestamp': '2025-09-10 02:32:49.736469', 'step': 5702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.766902', 'step': 5702, 'epoch': 1} {'type': 'loss', 'content': 0.17655541002750397, 'timestamp': '2025-09-10 02:32:49.769134', 'step': 5703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.798871', 'step': 5703, 'epoch': 1} {'type': 'loss', 'content': 0.11646367609500885, 'timestamp': '2025-09-10 02:32:49.822560', 'step': 5704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:49.852343', 'step': 5704, 'epoch': 1} {'type': 'loss', 'content': 0.15885472297668457, 'timestamp': '2025-09-10 02:32:49.854671', 'step': 5705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:49.887074', 'step': 5705, 'epoch': 1} {'type': 'loss', 'content': 0.1547345519065857, 'timestamp': '2025-09-10 02:32:49.889367', 'step': 5706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:49.919945', 'step': 5706, 'epoch': 1} {'type': 'loss', 'content': 0.136062890291214, 'timestamp': '2025-09-10 02:32:49.922231', 'step': 5707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:49.952370', 'step': 5707, 'epoch': 1} {'type': 'loss', 'content': 0.14876365661621094, 'timestamp': '2025-09-10 02:32:49.975925', 'step': 5708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.005752', 'step': 5708, 'epoch': 1} {'type': 'loss', 'content': 0.23045015335083008, 'timestamp': '2025-09-10 02:32:50.007670', 'step': 5709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.036972', 'step': 5709, 'epoch': 1} {'type': 'loss', 'content': 0.20885436236858368, 'timestamp': '2025-09-10 02:32:50.039345', 'step': 5710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:50.068681', 'step': 5710, 'epoch': 1} {'type': 'loss', 'content': 0.15825249254703522, 'timestamp': '2025-09-10 02:32:50.070887', 'step': 5711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.100910', 'step': 5711, 'epoch': 1} {'type': 'loss', 'content': 0.07463380694389343, 'timestamp': '2025-09-10 02:32:50.124750', 'step': 5712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:50.155214', 'step': 5712, 'epoch': 1} {'type': 'loss', 'content': 0.13594667613506317, 'timestamp': '2025-09-10 02:32:50.157647', 'step': 5713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.191428', 'step': 5713, 'epoch': 1} {'type': 'loss', 'content': 0.18670813739299774, 'timestamp': '2025-09-10 02:32:50.194245', 'step': 5714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:50.228024', 'step': 5714, 'epoch': 1} {'type': 'loss', 'content': 0.16285786032676697, 'timestamp': '2025-09-10 02:32:50.230335', 'step': 5715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:50.260072', 'step': 5715, 'epoch': 1} {'type': 'loss', 'content': 0.10862777382135391, 'timestamp': '2025-09-10 02:32:50.284262', 'step': 5716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.315347', 'step': 5716, 'epoch': 1} {'type': 'loss', 'content': 0.1901552677154541, 'timestamp': '2025-09-10 02:32:50.317603', 'step': 5717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:50.347484', 'step': 5717, 'epoch': 1} {'type': 'loss', 'content': 0.11061986535787582, 'timestamp': '2025-09-10 02:32:50.349914', 'step': 5718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.380794', 'step': 5718, 'epoch': 1} {'type': 'loss', 'content': 0.10860849916934967, 'timestamp': '2025-09-10 02:32:50.382979', 'step': 5719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.412928', 'step': 5719, 'epoch': 1} {'type': 'loss', 'content': 0.2510855495929718, 'timestamp': '2025-09-10 02:32:50.436691', 'step': 5720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:50.466840', 'step': 5720, 'epoch': 1} {'type': 'loss', 'content': 0.12834499776363373, 'timestamp': '2025-09-10 02:32:50.470838', 'step': 5721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.500873', 'step': 5721, 'epoch': 1} {'type': 'loss', 'content': 0.13762134313583374, 'timestamp': '2025-09-10 02:32:50.502868', 'step': 5722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.532657', 'step': 5722, 'epoch': 1} {'type': 'loss', 'content': 0.1517520397901535, 'timestamp': '2025-09-10 02:32:50.535104', 'step': 5723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.565507', 'step': 5723, 'epoch': 1} {'type': 'loss', 'content': 0.09936004877090454, 'timestamp': '2025-09-10 02:32:50.589324', 'step': 5724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.618701', 'step': 5724, 'epoch': 1} {'type': 'loss', 'content': 0.1347414255142212, 'timestamp': '2025-09-10 02:32:50.620841', 'step': 5725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:50.650612', 'step': 5725, 'epoch': 1} {'type': 'loss', 'content': 0.1858583688735962, 'timestamp': '2025-09-10 02:32:50.652916', 'step': 5726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:50.682704', 'step': 5726, 'epoch': 1} {'type': 'loss', 'content': 0.11437499523162842, 'timestamp': '2025-09-10 02:32:50.685371', 'step': 5727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.717026', 'step': 5727, 'epoch': 1} {'type': 'loss', 'content': 0.10708877444267273, 'timestamp': '2025-09-10 02:32:50.740357', 'step': 5728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.770710', 'step': 5728, 'epoch': 1} {'type': 'loss', 'content': 0.14240486919879913, 'timestamp': '2025-09-10 02:32:50.774751', 'step': 5729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.804978', 'step': 5729, 'epoch': 1} {'type': 'loss', 'content': 0.13778066635131836, 'timestamp': '2025-09-10 02:32:50.807096', 'step': 5730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:50.837015', 'step': 5730, 'epoch': 1} {'type': 'loss', 'content': 0.07946239411830902, 'timestamp': '2025-09-10 02:32:50.839412', 'step': 5731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.869522', 'step': 5731, 'epoch': 1} {'type': 'loss', 'content': 0.12805341184139252, 'timestamp': '2025-09-10 02:32:50.893076', 'step': 5732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.924040', 'step': 5732, 'epoch': 1} {'type': 'loss', 'content': 0.1673341542482376, 'timestamp': '2025-09-10 02:32:50.926247', 'step': 5733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.955862', 'step': 5733, 'epoch': 1} {'type': 'loss', 'content': 0.10045431554317474, 'timestamp': '2025-09-10 02:32:50.958716', 'step': 5734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.988625', 'step': 5734, 'epoch': 1} {'type': 'loss', 'content': 0.12005914747714996, 'timestamp': '2025-09-10 02:32:50.990686', 'step': 5735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:51.020501', 'step': 5735, 'epoch': 1} {'type': 'loss', 'content': 0.13839830458164215, 'timestamp': '2025-09-10 02:32:51.043931', 'step': 5736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:51.074481', 'step': 5736, 'epoch': 1} {'type': 'loss', 'content': 0.18263235688209534, 'timestamp': '2025-09-10 02:32:51.078791', 'step': 5737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:51.108993', 'step': 5737, 'epoch': 1} {'type': 'loss', 'content': 0.1115623489022255, 'timestamp': '2025-09-10 02:32:51.111360', 'step': 5738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.144807', 'step': 5738, 'epoch': 1} {'type': 'loss', 'content': 0.12241039425134659, 'timestamp': '2025-09-10 02:32:51.147303', 'step': 5739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:51.176662', 'step': 5739, 'epoch': 1} {'type': 'loss', 'content': 0.13921235501766205, 'timestamp': '2025-09-10 02:32:51.201497', 'step': 5740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.233357', 'step': 5740, 'epoch': 1} {'type': 'loss', 'content': 0.10770746320486069, 'timestamp': '2025-09-10 02:32:51.235704', 'step': 5741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:51.265484', 'step': 5741, 'epoch': 1} {'type': 'loss', 'content': 0.15281705558300018, 'timestamp': '2025-09-10 02:32:51.268295', 'step': 5742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.298055', 'step': 5742, 'epoch': 1} {'type': 'loss', 'content': 0.10443077981472015, 'timestamp': '2025-09-10 02:32:51.300366', 'step': 5743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:51.330245', 'step': 5743, 'epoch': 1} {'type': 'loss', 'content': 0.11393328756093979, 'timestamp': '2025-09-10 02:32:51.353959', 'step': 5744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.384298', 'step': 5744, 'epoch': 1} {'type': 'loss', 'content': 0.10746170580387115, 'timestamp': '2025-09-10 02:32:51.386293', 'step': 5745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.416511', 'step': 5745, 'epoch': 1} {'type': 'loss', 'content': 0.21906878054141998, 'timestamp': '2025-09-10 02:32:51.418909', 'step': 5746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.448210', 'step': 5746, 'epoch': 1} {'type': 'loss', 'content': 0.15718980133533478, 'timestamp': '2025-09-10 02:32:51.450510', 'step': 5747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:51.480942', 'step': 5747, 'epoch': 1} {'type': 'loss', 'content': 0.1546034961938858, 'timestamp': '2025-09-10 02:32:51.505551', 'step': 5748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.536327', 'step': 5748, 'epoch': 1} {'type': 'loss', 'content': 0.21583528816699982, 'timestamp': '2025-09-10 02:32:51.538548', 'step': 5749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.568653', 'step': 5749, 'epoch': 1} {'type': 'loss', 'content': 0.13375218212604523, 'timestamp': '2025-09-10 02:32:51.571090', 'step': 5750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.601176', 'step': 5750, 'epoch': 1} {'type': 'loss', 'content': 0.14999152719974518, 'timestamp': '2025-09-10 02:32:51.603403', 'step': 5751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.633795', 'step': 5751, 'epoch': 1} {'type': 'loss', 'content': 0.1070929691195488, 'timestamp': '2025-09-10 02:32:51.657459', 'step': 5752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:51.687834', 'step': 5752, 'epoch': 1} {'type': 'loss', 'content': 0.07383694499731064, 'timestamp': '2025-09-10 02:32:51.690091', 'step': 5753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:51.719635', 'step': 5753, 'epoch': 1} {'type': 'loss', 'content': 0.09335771203041077, 'timestamp': '2025-09-10 02:32:51.721819', 'step': 5754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:51.751738', 'step': 5754, 'epoch': 1} {'type': 'loss', 'content': 0.12487731873989105, 'timestamp': '2025-09-10 02:32:51.754643', 'step': 5755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.784506', 'step': 5755, 'epoch': 1} {'type': 'loss', 'content': 0.11542459577322006, 'timestamp': '2025-09-10 02:32:51.809499', 'step': 5756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:51.839959', 'step': 5756, 'epoch': 1} {'type': 'loss', 'content': 0.15317276120185852, 'timestamp': '2025-09-10 02:32:51.842097', 'step': 5757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:51.872214', 'step': 5757, 'epoch': 1} {'type': 'loss', 'content': 0.16988340020179749, 'timestamp': '2025-09-10 02:32:51.874343', 'step': 5758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:51.904356', 'step': 5758, 'epoch': 1} {'type': 'loss', 'content': 0.1155187264084816, 'timestamp': '2025-09-10 02:32:51.906564', 'step': 5759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.936758', 'step': 5759, 'epoch': 1} {'type': 'loss', 'content': 0.09801401197910309, 'timestamp': '2025-09-10 02:32:51.960455', 'step': 5760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.991537', 'step': 5760, 'epoch': 1} {'type': 'loss', 'content': 0.14574208855628967, 'timestamp': '2025-09-10 02:32:51.993632', 'step': 5761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:52.024398', 'step': 5761, 'epoch': 1} {'type': 'loss', 'content': 0.15251678228378296, 'timestamp': '2025-09-10 02:32:52.026485', 'step': 5762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.056391', 'step': 5762, 'epoch': 1} {'type': 'loss', 'content': 0.06287969648838043, 'timestamp': '2025-09-10 02:32:52.058481', 'step': 5763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.088651', 'step': 5763, 'epoch': 1} {'type': 'loss', 'content': 0.12486550211906433, 'timestamp': '2025-09-10 02:32:52.113202', 'step': 5764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.144377', 'step': 5764, 'epoch': 1} {'type': 'loss', 'content': 0.326953649520874, 'timestamp': '2025-09-10 02:32:52.146664', 'step': 5765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:52.176571', 'step': 5765, 'epoch': 1} {'type': 'loss', 'content': 0.17291224002838135, 'timestamp': '2025-09-10 02:32:52.179442', 'step': 5766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:52.210576', 'step': 5766, 'epoch': 1} {'type': 'loss', 'content': 0.16589795053005219, 'timestamp': '2025-09-10 02:32:52.213515', 'step': 5767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:52.245195', 'step': 5767, 'epoch': 1} {'type': 'loss', 'content': 0.17047685384750366, 'timestamp': '2025-09-10 02:32:52.268782', 'step': 5768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:52.299177', 'step': 5768, 'epoch': 1} {'type': 'loss', 'content': 0.13059350848197937, 'timestamp': '2025-09-10 02:32:52.301752', 'step': 5769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:52.332345', 'step': 5769, 'epoch': 1} {'type': 'loss', 'content': 0.1600550413131714, 'timestamp': '2025-09-10 02:32:52.334599', 'step': 5770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:52.364122', 'step': 5770, 'epoch': 1} {'type': 'loss', 'content': 0.22118334472179413, 'timestamp': '2025-09-10 02:32:52.366646', 'step': 5771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.396552', 'step': 5771, 'epoch': 1} {'type': 'loss', 'content': 0.3159196972846985, 'timestamp': '2025-09-10 02:32:52.420091', 'step': 5772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:52.449749', 'step': 5772, 'epoch': 1} {'type': 'loss', 'content': 0.22971846163272858, 'timestamp': '2025-09-10 02:32:52.452044', 'step': 5773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.481525', 'step': 5773, 'epoch': 1} {'type': 'loss', 'content': 0.1536039412021637, 'timestamp': '2025-09-10 02:32:52.483765', 'step': 5774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:52.513120', 'step': 5774, 'epoch': 1} {'type': 'loss', 'content': 0.20328931510448456, 'timestamp': '2025-09-10 02:32:52.515616', 'step': 5775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.545363', 'step': 5775, 'epoch': 1} {'type': 'loss', 'content': 0.13249246776103973, 'timestamp': '2025-09-10 02:32:52.568972', 'step': 5776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.599714', 'step': 5776, 'epoch': 1} {'type': 'loss', 'content': 0.11362718045711517, 'timestamp': '2025-09-10 02:32:52.601999', 'step': 5777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.631863', 'step': 5777, 'epoch': 1} {'type': 'loss', 'content': 0.14254848659038544, 'timestamp': '2025-09-10 02:32:52.634096', 'step': 5778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:52.663687', 'step': 5778, 'epoch': 1} {'type': 'loss', 'content': 0.21582283079624176, 'timestamp': '2025-09-10 02:32:52.666379', 'step': 5779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:52.696577', 'step': 5779, 'epoch': 1} {'type': 'loss', 'content': 0.193581685423851, 'timestamp': '2025-09-10 02:32:52.721367', 'step': 5780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:52.755052', 'step': 5780, 'epoch': 1} {'type': 'loss', 'content': 0.16241930425167084, 'timestamp': '2025-09-10 02:32:52.757862', 'step': 5781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:52.788351', 'step': 5781, 'epoch': 1} {'type': 'loss', 'content': 0.09314537048339844, 'timestamp': '2025-09-10 02:32:52.790765', 'step': 5782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:52.823049', 'step': 5782, 'epoch': 1} {'type': 'loss', 'content': 0.19211915135383606, 'timestamp': '2025-09-10 02:32:52.825227', 'step': 5783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:52.854596', 'step': 5783, 'epoch': 1} {'type': 'loss', 'content': 0.13255125284194946, 'timestamp': '2025-09-10 02:32:52.878004', 'step': 5784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:52.908275', 'step': 5784, 'epoch': 1} {'type': 'loss', 'content': 0.18648268282413483, 'timestamp': '2025-09-10 02:32:52.910651', 'step': 5785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.940585', 'step': 5785, 'epoch': 1} {'type': 'loss', 'content': 0.20159181952476501, 'timestamp': '2025-09-10 02:32:52.942936', 'step': 5786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:52.972357', 'step': 5786, 'epoch': 1} {'type': 'loss', 'content': 0.21326524019241333, 'timestamp': '2025-09-10 02:32:52.974670', 'step': 5787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.005166', 'step': 5787, 'epoch': 1} {'type': 'loss', 'content': 0.072846919298172, 'timestamp': '2025-09-10 02:32:53.028448', 'step': 5788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.058454', 'step': 5788, 'epoch': 1} {'type': 'loss', 'content': 0.16817668080329895, 'timestamp': '2025-09-10 02:32:53.060805', 'step': 5789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.091072', 'step': 5789, 'epoch': 1} {'type': 'loss', 'content': 0.19535022974014282, 'timestamp': '2025-09-10 02:32:53.093420', 'step': 5790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.123575', 'step': 5790, 'epoch': 1} {'type': 'loss', 'content': 0.14742666482925415, 'timestamp': '2025-09-10 02:32:53.125909', 'step': 5791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.155966', 'step': 5791, 'epoch': 1} {'type': 'loss', 'content': 0.1457383632659912, 'timestamp': '2025-09-10 02:32:53.179396', 'step': 5792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:53.210345', 'step': 5792, 'epoch': 1} {'type': 'loss', 'content': 0.17145639657974243, 'timestamp': '2025-09-10 02:32:53.212679', 'step': 5793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.243216', 'step': 5793, 'epoch': 1} {'type': 'loss', 'content': 0.17862339317798615, 'timestamp': '2025-09-10 02:32:53.245792', 'step': 5794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:53.275988', 'step': 5794, 'epoch': 1} {'type': 'loss', 'content': 0.17687508463859558, 'timestamp': '2025-09-10 02:32:53.278460', 'step': 5795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:53.308366', 'step': 5795, 'epoch': 1} {'type': 'loss', 'content': 0.22915837168693542, 'timestamp': '2025-09-10 02:32:53.332147', 'step': 5796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:53.362735', 'step': 5796, 'epoch': 1} {'type': 'loss', 'content': 0.12175628542900085, 'timestamp': '2025-09-10 02:32:53.365120', 'step': 5797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.395261', 'step': 5797, 'epoch': 1} {'type': 'loss', 'content': 0.07340841740369797, 'timestamp': '2025-09-10 02:32:53.397550', 'step': 5798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:53.427894', 'step': 5798, 'epoch': 1} {'type': 'loss', 'content': 0.11523938924074173, 'timestamp': '2025-09-10 02:32:53.430252', 'step': 5799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:53.460489', 'step': 5799, 'epoch': 1} {'type': 'loss', 'content': 0.13096362352371216, 'timestamp': '2025-09-10 02:32:53.484027', 'step': 5800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:53.514547', 'step': 5800, 'epoch': 1} {'type': 'loss', 'content': 0.258846253156662, 'timestamp': '2025-09-10 02:32:53.516582', 'step': 5801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:53.546544', 'step': 5801, 'epoch': 1} {'type': 'loss', 'content': 0.25029850006103516, 'timestamp': '2025-09-10 02:32:53.548834', 'step': 5802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.578392', 'step': 5802, 'epoch': 1} {'type': 'loss', 'content': 0.04168233647942543, 'timestamp': '2025-09-10 02:32:53.580572', 'step': 5803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:53.610992', 'step': 5803, 'epoch': 1} {'type': 'loss', 'content': 0.1170482262969017, 'timestamp': '2025-09-10 02:32:53.634458', 'step': 5804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:53.664604', 'step': 5804, 'epoch': 1} {'type': 'loss', 'content': 0.11624391376972198, 'timestamp': '2025-09-10 02:32:53.666655', 'step': 5805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:53.697453', 'step': 5805, 'epoch': 1} {'type': 'loss', 'content': 0.20401588082313538, 'timestamp': '2025-09-10 02:32:53.699664', 'step': 5806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:53.729706', 'step': 5806, 'epoch': 1} {'type': 'loss', 'content': 0.1436782330274582, 'timestamp': '2025-09-10 02:32:53.731812', 'step': 5807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.761507', 'step': 5807, 'epoch': 1} {'type': 'loss', 'content': 0.19517798721790314, 'timestamp': '2025-09-10 02:32:53.785168', 'step': 5808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:53.815417', 'step': 5808, 'epoch': 1} {'type': 'loss', 'content': 0.14378415048122406, 'timestamp': '2025-09-10 02:32:53.817566', 'step': 5809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:53.847740', 'step': 5809, 'epoch': 1} {'type': 'loss', 'content': 0.0946691557765007, 'timestamp': '2025-09-10 02:32:53.850068', 'step': 5810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:53.879899', 'step': 5810, 'epoch': 1} {'type': 'loss', 'content': 0.05925869941711426, 'timestamp': '2025-09-10 02:32:53.882760', 'step': 5811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:53.913554', 'step': 5811, 'epoch': 1} {'type': 'loss', 'content': 0.11240234971046448, 'timestamp': '2025-09-10 02:32:53.937023', 'step': 5812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:53.966770', 'step': 5812, 'epoch': 1} {'type': 'loss', 'content': 0.16323915123939514, 'timestamp': '2025-09-10 02:32:53.969116', 'step': 5813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:53.999157', 'step': 5813, 'epoch': 1} {'type': 'loss', 'content': 0.16099552810192108, 'timestamp': '2025-09-10 02:32:54.001369', 'step': 5814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:54.031557', 'step': 5814, 'epoch': 1} {'type': 'loss', 'content': 0.1646791249513626, 'timestamp': '2025-09-10 02:32:54.033949', 'step': 5815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.063492', 'step': 5815, 'epoch': 1} {'type': 'loss', 'content': 0.13975702226161957, 'timestamp': '2025-09-10 02:32:54.087008', 'step': 5816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.117214', 'step': 5816, 'epoch': 1} {'type': 'loss', 'content': 0.131212517619133, 'timestamp': '2025-09-10 02:32:54.119467', 'step': 5817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.149181', 'step': 5817, 'epoch': 1} {'type': 'loss', 'content': 0.14959315955638885, 'timestamp': '2025-09-10 02:32:54.156863', 'step': 5818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.186878', 'step': 5818, 'epoch': 1} {'type': 'loss', 'content': 0.07478346675634384, 'timestamp': '2025-09-10 02:32:54.188933', 'step': 5819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.228030', 'step': 5819, 'epoch': 1} {'type': 'loss', 'content': 0.17994831502437592, 'timestamp': '2025-09-10 02:32:54.251456', 'step': 5820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.281858', 'step': 5820, 'epoch': 1} {'type': 'loss', 'content': 0.1308569312095642, 'timestamp': '2025-09-10 02:32:54.284099', 'step': 5821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:54.313846', 'step': 5821, 'epoch': 1} {'type': 'loss', 'content': 0.21317610144615173, 'timestamp': '2025-09-10 02:32:54.316280', 'step': 5822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:54.347596', 'step': 5822, 'epoch': 1} {'type': 'loss', 'content': 0.12365012615919113, 'timestamp': '2025-09-10 02:32:54.349680', 'step': 5823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.379789', 'step': 5823, 'epoch': 1} {'type': 'loss', 'content': 0.134015753865242, 'timestamp': '2025-09-10 02:32:54.403254', 'step': 5824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.434673', 'step': 5824, 'epoch': 1} {'type': 'loss', 'content': 0.14200952649116516, 'timestamp': '2025-09-10 02:32:54.436866', 'step': 5825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:54.466730', 'step': 5825, 'epoch': 1} {'type': 'loss', 'content': 0.18528775870800018, 'timestamp': '2025-09-10 02:32:54.469328', 'step': 5826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:54.500774', 'step': 5826, 'epoch': 1} {'type': 'loss', 'content': 0.14100952446460724, 'timestamp': '2025-09-10 02:32:54.503118', 'step': 5827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.533046', 'step': 5827, 'epoch': 1} {'type': 'loss', 'content': 0.09236739575862885, 'timestamp': '2025-09-10 02:32:54.557112', 'step': 5828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.586976', 'step': 5828, 'epoch': 1} {'type': 'loss', 'content': 0.15197907388210297, 'timestamp': '2025-09-10 02:32:54.589255', 'step': 5829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.619189', 'step': 5829, 'epoch': 1} {'type': 'loss', 'content': 0.14232344925403595, 'timestamp': '2025-09-10 02:32:54.622205', 'step': 5830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.653069', 'step': 5830, 'epoch': 1} {'type': 'loss', 'content': 0.18918530642986298, 'timestamp': '2025-09-10 02:32:54.655573', 'step': 5831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:54.686070', 'step': 5831, 'epoch': 1} {'type': 'loss', 'content': 0.14117442071437836, 'timestamp': '2025-09-10 02:32:54.709551', 'step': 5832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:32:54.740262', 'step': 5832, 'epoch': 1} {'type': 'loss', 'content': 0.2178574651479721, 'timestamp': '2025-09-10 02:32:54.742309', 'step': 5833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.772778', 'step': 5833, 'epoch': 1} {'type': 'loss', 'content': 0.12952230870723724, 'timestamp': '2025-09-10 02:32:54.774988', 'step': 5834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.810882', 'step': 5834, 'epoch': 1} {'type': 'loss', 'content': 0.07346127927303314, 'timestamp': '2025-09-10 02:32:54.813031', 'step': 5835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:54.842660', 'step': 5835, 'epoch': 1} {'type': 'loss', 'content': 0.19237974286079407, 'timestamp': '2025-09-10 02:32:54.866149', 'step': 5836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.896948', 'step': 5836, 'epoch': 1} {'type': 'loss', 'content': 0.10517240315675735, 'timestamp': '2025-09-10 02:32:54.899174', 'step': 5837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.929415', 'step': 5837, 'epoch': 1} {'type': 'loss', 'content': 0.15758921205997467, 'timestamp': '2025-09-10 02:32:54.931343', 'step': 5838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:54.961022', 'step': 5838, 'epoch': 1} {'type': 'loss', 'content': 0.14691470563411713, 'timestamp': '2025-09-10 02:32:54.963341', 'step': 5839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:54.994903', 'step': 5839, 'epoch': 1} {'type': 'loss', 'content': 0.1435762345790863, 'timestamp': '2025-09-10 02:32:55.018515', 'step': 5840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:55.049034', 'step': 5840, 'epoch': 1} {'type': 'loss', 'content': 0.10719712823629379, 'timestamp': '2025-09-10 02:32:55.051158', 'step': 5841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:55.080657', 'step': 5841, 'epoch': 1} {'type': 'loss', 'content': 0.15617331862449646, 'timestamp': '2025-09-10 02:32:55.082918', 'step': 5842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.113811', 'step': 5842, 'epoch': 1} {'type': 'loss', 'content': 0.07508457452058792, 'timestamp': '2025-09-10 02:32:55.115971', 'step': 5843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:55.146035', 'step': 5843, 'epoch': 1} {'type': 'loss', 'content': 0.1160520613193512, 'timestamp': '2025-09-10 02:32:55.169524', 'step': 5844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:55.202021', 'step': 5844, 'epoch': 1} {'type': 'loss', 'content': 0.13596361875534058, 'timestamp': '2025-09-10 02:32:55.204090', 'step': 5845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:55.235026', 'step': 5845, 'epoch': 1} {'type': 'loss', 'content': 0.1750887632369995, 'timestamp': '2025-09-10 02:32:55.237691', 'step': 5846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:55.268732', 'step': 5846, 'epoch': 1} {'type': 'loss', 'content': 0.23728711903095245, 'timestamp': '2025-09-10 02:32:55.272291', 'step': 5847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.307188', 'step': 5847, 'epoch': 1} {'type': 'loss', 'content': 0.1907857209444046, 'timestamp': '2025-09-10 02:32:55.332863', 'step': 5848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.362385', 'step': 5848, 'epoch': 1} {'type': 'loss', 'content': 0.21195414662361145, 'timestamp': '2025-09-10 02:32:55.366559', 'step': 5849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:55.397556', 'step': 5849, 'epoch': 1} {'type': 'loss', 'content': 0.2104077935218811, 'timestamp': '2025-09-10 02:32:55.400754', 'step': 5850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:55.432914', 'step': 5850, 'epoch': 1} {'type': 'loss', 'content': 0.20169851183891296, 'timestamp': '2025-09-10 02:32:55.435566', 'step': 5851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.468698', 'step': 5851, 'epoch': 1} {'type': 'loss', 'content': 0.10695669800043106, 'timestamp': '2025-09-10 02:32:55.492957', 'step': 5852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:55.523365', 'step': 5852, 'epoch': 1} {'type': 'loss', 'content': 0.15428940951824188, 'timestamp': '2025-09-10 02:32:55.526005', 'step': 5853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.559604', 'step': 5853, 'epoch': 1} {'type': 'loss', 'content': 0.1249641627073288, 'timestamp': '2025-09-10 02:32:55.562020', 'step': 5854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:55.595815', 'step': 5854, 'epoch': 1} {'type': 'loss', 'content': 0.22022537887096405, 'timestamp': '2025-09-10 02:32:55.598152', 'step': 5855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:55.632773', 'step': 5855, 'epoch': 1} {'type': 'loss', 'content': 0.15293574333190918, 'timestamp': '2025-09-10 02:32:55.658045', 'step': 5856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:55.690569', 'step': 5856, 'epoch': 1} {'type': 'loss', 'content': 0.2599971890449524, 'timestamp': '2025-09-10 02:32:55.693090', 'step': 5857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:55.725633', 'step': 5857, 'epoch': 1} {'type': 'loss', 'content': 0.1460486352443695, 'timestamp': '2025-09-10 02:32:55.728564', 'step': 5858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:55.759561', 'step': 5858, 'epoch': 1} {'type': 'loss', 'content': 0.18714597821235657, 'timestamp': '2025-09-10 02:32:55.763299', 'step': 5859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:55.800199', 'step': 5859, 'epoch': 1} {'type': 'loss', 'content': 0.21839530766010284, 'timestamp': '2025-09-10 02:32:55.824165', 'step': 5860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:55.857345', 'step': 5860, 'epoch': 1} {'type': 'loss', 'content': 0.14389681816101074, 'timestamp': '2025-09-10 02:32:55.860187', 'step': 5861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.890236', 'step': 5861, 'epoch': 1} {'type': 'loss', 'content': 0.17394791543483734, 'timestamp': '2025-09-10 02:32:55.892815', 'step': 5862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:55.923721', 'step': 5862, 'epoch': 1} {'type': 'loss', 'content': 0.15348729491233826, 'timestamp': '2025-09-10 02:32:55.928613', 'step': 5863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:55.963892', 'step': 5863, 'epoch': 1} {'type': 'loss', 'content': 0.08453972637653351, 'timestamp': '2025-09-10 02:32:55.987750', 'step': 5864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.020849', 'step': 5864, 'epoch': 1} {'type': 'loss', 'content': 0.13448157906532288, 'timestamp': '2025-09-10 02:32:56.027510', 'step': 5865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.060452', 'step': 5865, 'epoch': 1} {'type': 'loss', 'content': 0.1696837842464447, 'timestamp': '2025-09-10 02:32:56.065455', 'step': 5866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.095466', 'step': 5866, 'epoch': 1} {'type': 'loss', 'content': 0.16720633208751678, 'timestamp': '2025-09-10 02:32:56.102442', 'step': 5867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.139971', 'step': 5867, 'epoch': 1} {'type': 'loss', 'content': 0.15325061976909637, 'timestamp': '2025-09-10 02:32:56.164461', 'step': 5868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:56.198656', 'step': 5868, 'epoch': 1} {'type': 'loss', 'content': 0.15280084311962128, 'timestamp': '2025-09-10 02:32:56.201115', 'step': 5869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:56.231935', 'step': 5869, 'epoch': 1} {'type': 'loss', 'content': 0.19626645743846893, 'timestamp': '2025-09-10 02:32:56.233954', 'step': 5870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.263701', 'step': 5870, 'epoch': 1} {'type': 'loss', 'content': 0.1290809065103531, 'timestamp': '2025-09-10 02:32:56.265755', 'step': 5871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:56.300931', 'step': 5871, 'epoch': 1} {'type': 'loss', 'content': 0.09190259128808975, 'timestamp': '2025-09-10 02:32:56.324414', 'step': 5872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.355700', 'step': 5872, 'epoch': 1} {'type': 'loss', 'content': 0.1465306282043457, 'timestamp': '2025-09-10 02:32:56.359081', 'step': 5873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.391420', 'step': 5873, 'epoch': 1} {'type': 'loss', 'content': 0.10177476704120636, 'timestamp': '2025-09-10 02:32:56.393657', 'step': 5874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:56.424295', 'step': 5874, 'epoch': 1} {'type': 'loss', 'content': 0.16234183311462402, 'timestamp': '2025-09-10 02:32:56.426556', 'step': 5875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.458057', 'step': 5875, 'epoch': 1} {'type': 'loss', 'content': 0.15733665227890015, 'timestamp': '2025-09-10 02:32:56.481482', 'step': 5876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.511872', 'step': 5876, 'epoch': 1} {'type': 'loss', 'content': 0.10479560494422913, 'timestamp': '2025-09-10 02:32:56.514767', 'step': 5877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.545281', 'step': 5877, 'epoch': 1} {'type': 'loss', 'content': 0.08330951631069183, 'timestamp': '2025-09-10 02:32:56.548164', 'step': 5878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.578255', 'step': 5878, 'epoch': 1} {'type': 'loss', 'content': 0.16998934745788574, 'timestamp': '2025-09-10 02:32:56.580471', 'step': 5879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:56.610447', 'step': 5879, 'epoch': 1} {'type': 'loss', 'content': 0.1480606645345688, 'timestamp': '2025-09-10 02:32:56.634085', 'step': 5880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:56.664251', 'step': 5880, 'epoch': 1} {'type': 'loss', 'content': 0.24720756709575653, 'timestamp': '2025-09-10 02:32:56.666439', 'step': 5881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.696186', 'step': 5881, 'epoch': 1} {'type': 'loss', 'content': 0.1955413967370987, 'timestamp': '2025-09-10 02:32:56.698087', 'step': 5882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:56.728253', 'step': 5882, 'epoch': 1} {'type': 'loss', 'content': 0.1566883623600006, 'timestamp': '2025-09-10 02:32:56.730647', 'step': 5883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:56.760767', 'step': 5883, 'epoch': 1} {'type': 'loss', 'content': 0.1797574907541275, 'timestamp': '2025-09-10 02:32:56.784667', 'step': 5884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:56.817408', 'step': 5884, 'epoch': 1} {'type': 'loss', 'content': 0.05921102687716484, 'timestamp': '2025-09-10 02:32:56.819724', 'step': 5885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.849468', 'step': 5885, 'epoch': 1} {'type': 'loss', 'content': 0.1843581348657608, 'timestamp': '2025-09-10 02:32:56.852453', 'step': 5886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.881938', 'step': 5886, 'epoch': 1} {'type': 'loss', 'content': 0.11124137043952942, 'timestamp': '2025-09-10 02:32:56.883809', 'step': 5887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.912975', 'step': 5887, 'epoch': 1} {'type': 'loss', 'content': 0.16121186316013336, 'timestamp': '2025-09-10 02:32:56.936428', 'step': 5888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:56.967657', 'step': 5888, 'epoch': 1} {'type': 'loss', 'content': 0.29928770661354065, 'timestamp': '2025-09-10 02:32:56.969696', 'step': 5889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.999141', 'step': 5889, 'epoch': 1} {'type': 'loss', 'content': 0.12126314640045166, 'timestamp': '2025-09-10 02:32:57.001074', 'step': 5890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:57.031211', 'step': 5890, 'epoch': 1} {'type': 'loss', 'content': 0.16062283515930176, 'timestamp': '2025-09-10 02:32:57.033555', 'step': 5891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.063630', 'step': 5891, 'epoch': 1} {'type': 'loss', 'content': 0.10567092150449753, 'timestamp': '2025-09-10 02:32:57.086932', 'step': 5892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.117150', 'step': 5892, 'epoch': 1} {'type': 'loss', 'content': 0.10192952305078506, 'timestamp': '2025-09-10 02:32:57.119355', 'step': 5893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.148921', 'step': 5893, 'epoch': 1} {'type': 'loss', 'content': 0.13004246354103088, 'timestamp': '2025-09-10 02:32:57.152275', 'step': 5894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.182939', 'step': 5894, 'epoch': 1} {'type': 'loss', 'content': 0.2223026305437088, 'timestamp': '2025-09-10 02:32:57.188509', 'step': 5895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:57.218321', 'step': 5895, 'epoch': 1} {'type': 'loss', 'content': 0.1680542528629303, 'timestamp': '2025-09-10 02:32:57.241700', 'step': 5896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.271296', 'step': 5896, 'epoch': 1} {'type': 'loss', 'content': 0.20496122539043427, 'timestamp': '2025-09-10 02:32:57.273567', 'step': 5897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:57.304240', 'step': 5897, 'epoch': 1} {'type': 'loss', 'content': 0.12479197233915329, 'timestamp': '2025-09-10 02:32:57.306772', 'step': 5898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:57.336634', 'step': 5898, 'epoch': 1} {'type': 'loss', 'content': 0.07808762043714523, 'timestamp': '2025-09-10 02:32:57.338988', 'step': 5899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:57.372452', 'step': 5899, 'epoch': 1} {'type': 'loss', 'content': 0.2077944129705429, 'timestamp': '2025-09-10 02:32:57.399230', 'step': 5900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:57.436557', 'step': 5900, 'epoch': 1} {'type': 'loss', 'content': 0.11006780713796616, 'timestamp': '2025-09-10 02:32:57.439164', 'step': 5901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:57.469491', 'step': 5901, 'epoch': 1} {'type': 'loss', 'content': 0.10144265741109848, 'timestamp': '2025-09-10 02:32:57.472758', 'step': 5902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:57.509595', 'step': 5902, 'epoch': 1} {'type': 'loss', 'content': 0.09987247735261917, 'timestamp': '2025-09-10 02:32:57.511858', 'step': 5903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:57.545915', 'step': 5903, 'epoch': 1} {'type': 'loss', 'content': 0.26864585280418396, 'timestamp': '2025-09-10 02:32:57.569696', 'step': 5904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.600353', 'step': 5904, 'epoch': 1} {'type': 'loss', 'content': 0.16697755455970764, 'timestamp': '2025-09-10 02:32:57.608917', 'step': 5905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:57.651483', 'step': 5905, 'epoch': 1} {'type': 'loss', 'content': 0.20985879004001617, 'timestamp': '2025-09-10 02:32:57.653834', 'step': 5906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:57.691614', 'step': 5906, 'epoch': 1} {'type': 'loss', 'content': 0.15185679495334625, 'timestamp': '2025-09-10 02:32:57.694629', 'step': 5907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:57.725743', 'step': 5907, 'epoch': 1} {'type': 'loss', 'content': 0.17418213188648224, 'timestamp': '2025-09-10 02:32:57.749720', 'step': 5908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:57.786617', 'step': 5908, 'epoch': 1} {'type': 'loss', 'content': 0.14247895777225494, 'timestamp': '2025-09-10 02:32:57.789251', 'step': 5909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.824490', 'step': 5909, 'epoch': 1} {'type': 'loss', 'content': 0.14014612138271332, 'timestamp': '2025-09-10 02:32:57.828852', 'step': 5910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:57.862331', 'step': 5910, 'epoch': 1} {'type': 'loss', 'content': 0.16711245477199554, 'timestamp': '2025-09-10 02:32:57.864835', 'step': 5911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:57.900391', 'step': 5911, 'epoch': 1} {'type': 'loss', 'content': 0.12707632780075073, 'timestamp': '2025-09-10 02:32:57.923892', 'step': 5912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.964215', 'step': 5912, 'epoch': 1} {'type': 'loss', 'content': 0.13266411423683167, 'timestamp': '2025-09-10 02:32:57.967102', 'step': 5913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:57.998184', 'step': 5913, 'epoch': 1} {'type': 'loss', 'content': 0.300317645072937, 'timestamp': '2025-09-10 02:32:58.002556', 'step': 5914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:58.038124', 'step': 5914, 'epoch': 1} {'type': 'loss', 'content': 0.2079625278711319, 'timestamp': '2025-09-10 02:32:58.040556', 'step': 5915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:58.074865', 'step': 5915, 'epoch': 1} {'type': 'loss', 'content': 0.11267630755901337, 'timestamp': '2025-09-10 02:32:58.098469', 'step': 5916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:58.128655', 'step': 5916, 'epoch': 1} {'type': 'loss', 'content': 0.18109822273254395, 'timestamp': '2025-09-10 02:32:58.131152', 'step': 5917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.161030', 'step': 5917, 'epoch': 1} {'type': 'loss', 'content': 0.13304241001605988, 'timestamp': '2025-09-10 02:32:58.162999', 'step': 5918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.193769', 'step': 5918, 'epoch': 1} {'type': 'loss', 'content': 0.19727259874343872, 'timestamp': '2025-09-10 02:32:58.196015', 'step': 5919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.225280', 'step': 5919, 'epoch': 1} {'type': 'loss', 'content': 0.17902855575084686, 'timestamp': '2025-09-10 02:32:58.248862', 'step': 5920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.279065', 'step': 5920, 'epoch': 1} {'type': 'loss', 'content': 0.1556006222963333, 'timestamp': '2025-09-10 02:32:58.285093', 'step': 5921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:58.317023', 'step': 5921, 'epoch': 1} {'type': 'loss', 'content': 0.20259647071361542, 'timestamp': '2025-09-10 02:32:58.319088', 'step': 5922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.349209', 'step': 5922, 'epoch': 1} {'type': 'loss', 'content': 0.23260733485221863, 'timestamp': '2025-09-10 02:32:58.351442', 'step': 5923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.381579', 'step': 5923, 'epoch': 1} {'type': 'loss', 'content': 0.13870581984519958, 'timestamp': '2025-09-10 02:32:58.405917', 'step': 5924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.435927', 'step': 5924, 'epoch': 1} {'type': 'loss', 'content': 0.09466473758220673, 'timestamp': '2025-09-10 02:32:58.437974', 'step': 5925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.467852', 'step': 5925, 'epoch': 1} {'type': 'loss', 'content': 0.13342171907424927, 'timestamp': '2025-09-10 02:32:58.472047', 'step': 5926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.502437', 'step': 5926, 'epoch': 1} {'type': 'loss', 'content': 0.08159847557544708, 'timestamp': '2025-09-10 02:32:58.504907', 'step': 5927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.534487', 'step': 5927, 'epoch': 1} {'type': 'loss', 'content': 0.0730443075299263, 'timestamp': '2025-09-10 02:32:58.558294', 'step': 5928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.597492', 'step': 5928, 'epoch': 1} {'type': 'loss', 'content': 0.09994754940271378, 'timestamp': '2025-09-10 02:32:58.602192', 'step': 5929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.642105', 'step': 5929, 'epoch': 1} {'type': 'loss', 'content': 0.05546613782644272, 'timestamp': '2025-09-10 02:32:58.644865', 'step': 5930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.691550', 'step': 5930, 'epoch': 1} {'type': 'loss', 'content': 0.14852410554885864, 'timestamp': '2025-09-10 02:32:58.696549', 'step': 5931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.730455', 'step': 5931, 'epoch': 1} {'type': 'loss', 'content': 0.16732549667358398, 'timestamp': '2025-09-10 02:32:58.755934', 'step': 5932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:58.786648', 'step': 5932, 'epoch': 1} {'type': 'loss', 'content': 0.24434883892536163, 'timestamp': '2025-09-10 02:32:58.789179', 'step': 5933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.819935', 'step': 5933, 'epoch': 1} {'type': 'loss', 'content': 0.12418968230485916, 'timestamp': '2025-09-10 02:32:58.823975', 'step': 5934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:58.854837', 'step': 5934, 'epoch': 1} {'type': 'loss', 'content': 0.1695210486650467, 'timestamp': '2025-09-10 02:32:58.857102', 'step': 5935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.886898', 'step': 5935, 'epoch': 1} {'type': 'loss', 'content': 0.14230437576770782, 'timestamp': '2025-09-10 02:32:58.910177', 'step': 5936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:58.940182', 'step': 5936, 'epoch': 1} {'type': 'loss', 'content': 0.10041681677103043, 'timestamp': '2025-09-10 02:32:58.942934', 'step': 5937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.974027', 'step': 5937, 'epoch': 1} {'type': 'loss', 'content': 0.16880667209625244, 'timestamp': '2025-09-10 02:32:58.977899', 'step': 5938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.008019', 'step': 5938, 'epoch': 1} {'type': 'loss', 'content': 0.16148723661899567, 'timestamp': '2025-09-10 02:32:59.010596', 'step': 5939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.041738', 'step': 5939, 'epoch': 1} {'type': 'loss', 'content': 0.3303591310977936, 'timestamp': '2025-09-10 02:32:59.069559', 'step': 5940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.111980', 'step': 5940, 'epoch': 1} {'type': 'loss', 'content': 0.19431525468826294, 'timestamp': '2025-09-10 02:32:59.114477', 'step': 5941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:59.145770', 'step': 5941, 'epoch': 1} {'type': 'loss', 'content': 0.15634071826934814, 'timestamp': '2025-09-10 02:32:59.148180', 'step': 5942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:32:59.178256', 'step': 5942, 'epoch': 1} {'type': 'loss', 'content': 0.0804453194141388, 'timestamp': '2025-09-10 02:32:59.182164', 'step': 5943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:59.211757', 'step': 5943, 'epoch': 1} {'type': 'loss', 'content': 0.17093680799007416, 'timestamp': '2025-09-10 02:32:59.235464', 'step': 5944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:59.266995', 'step': 5944, 'epoch': 1} {'type': 'loss', 'content': 0.25138500332832336, 'timestamp': '2025-09-10 02:32:59.269382', 'step': 5945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:59.299747', 'step': 5945, 'epoch': 1} {'type': 'loss', 'content': 0.12099751085042953, 'timestamp': '2025-09-10 02:32:59.302155', 'step': 5946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:59.332005', 'step': 5946, 'epoch': 1} {'type': 'loss', 'content': 0.14179429411888123, 'timestamp': '2025-09-10 02:32:59.334474', 'step': 5947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.364901', 'step': 5947, 'epoch': 1} {'type': 'loss', 'content': 0.18915443122386932, 'timestamp': '2025-09-10 02:32:59.388284', 'step': 5948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.418561', 'step': 5948, 'epoch': 1} {'type': 'loss', 'content': 0.1918407529592514, 'timestamp': '2025-09-10 02:32:59.420694', 'step': 5949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.450791', 'step': 5949, 'epoch': 1} {'type': 'loss', 'content': 0.175100177526474, 'timestamp': '2025-09-10 02:32:59.453245', 'step': 5950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:59.483596', 'step': 5950, 'epoch': 1} {'type': 'loss', 'content': 0.16729004681110382, 'timestamp': '2025-09-10 02:32:59.486193', 'step': 5951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:59.518595', 'step': 5951, 'epoch': 1} {'type': 'loss', 'content': 0.14759983122348785, 'timestamp': '2025-09-10 02:32:59.542239', 'step': 5952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:59.571940', 'step': 5952, 'epoch': 1} {'type': 'loss', 'content': 0.16091322898864746, 'timestamp': '2025-09-10 02:32:59.574183', 'step': 5953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.604277', 'step': 5953, 'epoch': 1} {'type': 'loss', 'content': 0.10861672461032867, 'timestamp': '2025-09-10 02:32:59.606503', 'step': 5954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:59.636307', 'step': 5954, 'epoch': 1} {'type': 'loss', 'content': 0.11324387043714523, 'timestamp': '2025-09-10 02:32:59.638644', 'step': 5955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.672095', 'step': 5955, 'epoch': 1} {'type': 'loss', 'content': 0.1228678748011589, 'timestamp': '2025-09-10 02:32:59.695736', 'step': 5956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.725648', 'step': 5956, 'epoch': 1} {'type': 'loss', 'content': 0.13114525377750397, 'timestamp': '2025-09-10 02:32:59.728134', 'step': 5957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.758795', 'step': 5957, 'epoch': 1} {'type': 'loss', 'content': 0.1732792854309082, 'timestamp': '2025-09-10 02:32:59.761105', 'step': 5958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.790677', 'step': 5958, 'epoch': 1} {'type': 'loss', 'content': 0.2139972448348999, 'timestamp': '2025-09-10 02:32:59.792867', 'step': 5959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.823334', 'step': 5959, 'epoch': 1} {'type': 'loss', 'content': 0.07886848598718643, 'timestamp': '2025-09-10 02:32:59.850956', 'step': 5960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.889445', 'step': 5960, 'epoch': 1} {'type': 'loss', 'content': 0.19801826775074005, 'timestamp': '2025-09-10 02:32:59.891786', 'step': 5961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:59.921928', 'step': 5961, 'epoch': 1} {'type': 'loss', 'content': 0.14148566126823425, 'timestamp': '2025-09-10 02:32:59.924074', 'step': 5962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:59.954556', 'step': 5962, 'epoch': 1} {'type': 'loss', 'content': 0.08821012824773788, 'timestamp': '2025-09-10 02:32:59.956747', 'step': 5963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.986838', 'step': 5963, 'epoch': 1} {'type': 'loss', 'content': 0.08110429346561432, 'timestamp': '2025-09-10 02:33:00.011776', 'step': 5964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.042387', 'step': 5964, 'epoch': 1} {'type': 'loss', 'content': 0.11315733194351196, 'timestamp': '2025-09-10 02:33:00.044628', 'step': 5965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:00.076196', 'step': 5965, 'epoch': 1} {'type': 'loss', 'content': 0.1212037056684494, 'timestamp': '2025-09-10 02:33:00.078354', 'step': 5966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:00.108396', 'step': 5966, 'epoch': 1} {'type': 'loss', 'content': 0.10617504268884659, 'timestamp': '2025-09-10 02:33:00.110750', 'step': 5967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:00.141352', 'step': 5967, 'epoch': 1} {'type': 'loss', 'content': 0.13849975168704987, 'timestamp': '2025-09-10 02:33:00.164863', 'step': 5968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:00.194915', 'step': 5968, 'epoch': 1} {'type': 'loss', 'content': 0.09536350518465042, 'timestamp': '2025-09-10 02:33:00.197084', 'step': 5969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.227439', 'step': 5969, 'epoch': 1} {'type': 'loss', 'content': 0.1371040791273117, 'timestamp': '2025-09-10 02:33:00.229850', 'step': 5970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:00.260063', 'step': 5970, 'epoch': 1} {'type': 'loss', 'content': 0.1129697635769844, 'timestamp': '2025-09-10 02:33:00.262546', 'step': 5971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:33:00.292490', 'step': 5971, 'epoch': 1} {'type': 'loss', 'content': 0.11257941275835037, 'timestamp': '2025-09-10 02:33:00.320322', 'step': 5972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.353982', 'step': 5972, 'epoch': 1} {'type': 'loss', 'content': 0.1632014364004135, 'timestamp': '2025-09-10 02:33:00.355998', 'step': 5973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:00.385326', 'step': 5973, 'epoch': 1} {'type': 'loss', 'content': 0.11332967132329941, 'timestamp': '2025-09-10 02:33:00.387527', 'step': 5974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:00.417280', 'step': 5974, 'epoch': 1} {'type': 'loss', 'content': 0.22463706135749817, 'timestamp': '2025-09-10 02:33:00.419688', 'step': 5975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:00.450776', 'step': 5975, 'epoch': 1} {'type': 'loss', 'content': 0.1403355747461319, 'timestamp': '2025-09-10 02:33:00.474211', 'step': 5976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.505156', 'step': 5976, 'epoch': 1} {'type': 'loss', 'content': 0.1282680630683899, 'timestamp': '2025-09-10 02:33:00.507516', 'step': 5977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:00.537491', 'step': 5977, 'epoch': 1} {'type': 'loss', 'content': 0.12268783152103424, 'timestamp': '2025-09-10 02:33:00.539566', 'step': 5978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:00.570266', 'step': 5978, 'epoch': 1} {'type': 'loss', 'content': 0.2134040743112564, 'timestamp': '2025-09-10 02:33:00.572433', 'step': 5979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:33:00.602459', 'step': 5979, 'epoch': 1} {'type': 'loss', 'content': 0.1475532501935959, 'timestamp': '2025-09-10 02:33:00.627809', 'step': 5980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:00.660250', 'step': 5980, 'epoch': 1} {'type': 'loss', 'content': 0.17654037475585938, 'timestamp': '2025-09-10 02:33:00.662705', 'step': 5981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.693178', 'step': 5981, 'epoch': 1} {'type': 'loss', 'content': 0.15774136781692505, 'timestamp': '2025-09-10 02:33:00.695492', 'step': 5982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:00.726534', 'step': 5982, 'epoch': 1} {'type': 'loss', 'content': 0.18718360364437103, 'timestamp': '2025-09-10 02:33:00.728767', 'step': 5983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:00.759303', 'step': 5983, 'epoch': 1} {'type': 'loss', 'content': 0.25163954496383667, 'timestamp': '2025-09-10 02:33:00.782697', 'step': 5984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.814335', 'step': 5984, 'epoch': 1} {'type': 'loss', 'content': 0.14941589534282684, 'timestamp': '2025-09-10 02:33:00.816492', 'step': 5985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:00.846179', 'step': 5985, 'epoch': 1} {'type': 'loss', 'content': 0.0900573581457138, 'timestamp': '2025-09-10 02:33:00.848543', 'step': 5986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.879561', 'step': 5986, 'epoch': 1} {'type': 'loss', 'content': 0.13128793239593506, 'timestamp': '2025-09-10 02:33:00.883091', 'step': 5987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:00.915320', 'step': 5987, 'epoch': 1} {'type': 'loss', 'content': 0.15857884287834167, 'timestamp': '2025-09-10 02:33:00.938485', 'step': 5988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:00.968395', 'step': 5988, 'epoch': 1} {'type': 'loss', 'content': 0.10914944857358932, 'timestamp': '2025-09-10 02:33:00.970392', 'step': 5989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.999897', 'step': 5989, 'epoch': 1} {'type': 'loss', 'content': 0.18787960708141327, 'timestamp': '2025-09-10 02:33:01.003483', 'step': 5990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:01.033031', 'step': 5990, 'epoch': 1} {'type': 'loss', 'content': 0.1187308207154274, 'timestamp': '2025-09-10 02:33:01.037331', 'step': 5991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:01.067181', 'step': 5991, 'epoch': 1} {'type': 'loss', 'content': 0.08376254886388779, 'timestamp': '2025-09-10 02:33:01.090556', 'step': 5992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:01.121466', 'step': 5992, 'epoch': 1} {'type': 'loss', 'content': 0.20191499590873718, 'timestamp': '2025-09-10 02:33:01.123689', 'step': 5993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:01.154269', 'step': 5993, 'epoch': 1} {'type': 'loss', 'content': 0.19064262509346008, 'timestamp': '2025-09-10 02:33:01.156254', 'step': 5994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:01.186238', 'step': 5994, 'epoch': 1} {'type': 'loss', 'content': 0.2492774873971939, 'timestamp': '2025-09-10 02:33:01.188319', 'step': 5995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:01.217828', 'step': 5995, 'epoch': 1} {'type': 'loss', 'content': 0.14165934920310974, 'timestamp': '2025-09-10 02:33:01.241159', 'step': 5996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:01.271542', 'step': 5996, 'epoch': 1} {'type': 'loss', 'content': 0.1432640701532364, 'timestamp': '2025-09-10 02:33:01.273775', 'step': 5997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:01.303255', 'step': 5997, 'epoch': 1} {'type': 'loss', 'content': 0.152284175157547, 'timestamp': '2025-09-10 02:33:01.305386', 'step': 5998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:01.336552', 'step': 5998, 'epoch': 1} {'type': 'loss', 'content': 0.21177758276462555, 'timestamp': '2025-09-10 02:33:01.340586', 'step': 5999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:01.371642', 'step': 5999, 'epoch': 1} {'type': 'loss', 'content': 0.23372624814510345, 'timestamp': '2025-09-10 02:33:01.395058', 'step': 6000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-09-10 02:33:06.029647', 'step': 6000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.062381', 'step': 6000, 'epoch': 1} {'type': 'loss', 'content': 0.15037111937999725, 'timestamp': '2025-09-10 02:33:06.063937', 'step': 6001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:06.095133', 'step': 6001, 'epoch': 1} {'type': 'loss', 'content': 0.20431393384933472, 'timestamp': '2025-09-10 02:33:06.097003', 'step': 6002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.127166', 'step': 6002, 'epoch': 1} {'type': 'loss', 'content': 0.16478592157363892, 'timestamp': '2025-09-10 02:33:06.128981', 'step': 6003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.160006', 'step': 6003, 'epoch': 1} {'type': 'loss', 'content': 0.11659488826990128, 'timestamp': '2025-09-10 02:33:06.183682', 'step': 6004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.214728', 'step': 6004, 'epoch': 1} {'type': 'loss', 'content': 0.14774847030639648, 'timestamp': '2025-09-10 02:33:06.216840', 'step': 6005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.246790', 'step': 6005, 'epoch': 1} {'type': 'loss', 'content': 0.0825599730014801, 'timestamp': '2025-09-10 02:33:06.249282', 'step': 6006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:06.279579', 'step': 6006, 'epoch': 1} {'type': 'loss', 'content': 0.23053626716136932, 'timestamp': '2025-09-10 02:33:06.281183', 'step': 6007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:06.311747', 'step': 6007, 'epoch': 1} {'type': 'loss', 'content': 0.08968335390090942, 'timestamp': '2025-09-10 02:33:06.335315', 'step': 6008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:06.366021', 'step': 6008, 'epoch': 1} {'type': 'loss', 'content': 0.1221783459186554, 'timestamp': '2025-09-10 02:33:06.369950', 'step': 6009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:06.403103', 'step': 6009, 'epoch': 1} {'type': 'loss', 'content': 0.1354662925004959, 'timestamp': '2025-09-10 02:33:06.408438', 'step': 6010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:06.444497', 'step': 6010, 'epoch': 1} {'type': 'loss', 'content': 0.20908282697200775, 'timestamp': '2025-09-10 02:33:06.446454', 'step': 6011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:06.476027', 'step': 6011, 'epoch': 1} {'type': 'loss', 'content': 0.11240492016077042, 'timestamp': '2025-09-10 02:33:06.499567', 'step': 6012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:06.530444', 'step': 6012, 'epoch': 1} {'type': 'loss', 'content': 0.19187110662460327, 'timestamp': '2025-09-10 02:33:06.532705', 'step': 6013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.562841', 'step': 6013, 'epoch': 1} {'type': 'loss', 'content': 0.17269417643547058, 'timestamp': '2025-09-10 02:33:06.567870', 'step': 6014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:33:06.599106', 'step': 6014, 'epoch': 1} {'type': 'loss', 'content': 0.10216320306062698, 'timestamp': '2025-09-10 02:33:06.603479', 'step': 6015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:06.636768', 'step': 6015, 'epoch': 1} {'type': 'loss', 'content': 0.18558387458324432, 'timestamp': '2025-09-10 02:33:06.660379', 'step': 6016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:06.690779', 'step': 6016, 'epoch': 1} {'type': 'loss', 'content': 0.14240819215774536, 'timestamp': '2025-09-10 02:33:06.692742', 'step': 6017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:06.725984', 'step': 6017, 'epoch': 1} {'type': 'loss', 'content': 0.09401200711727142, 'timestamp': '2025-09-10 02:33:06.727861', 'step': 6018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.757792', 'step': 6018, 'epoch': 1} {'type': 'loss', 'content': 0.1216983050107956, 'timestamp': '2025-09-10 02:33:06.759535', 'step': 6019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:06.789850', 'step': 6019, 'epoch': 1} {'type': 'loss', 'content': 0.2344825118780136, 'timestamp': '2025-09-10 02:33:06.814112', 'step': 6020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.843919', 'step': 6020, 'epoch': 1} {'type': 'loss', 'content': 0.19816836714744568, 'timestamp': '2025-09-10 02:33:06.845576', 'step': 6021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.875081', 'step': 6021, 'epoch': 1} {'type': 'loss', 'content': 0.0952080637216568, 'timestamp': '2025-09-10 02:33:06.878584', 'step': 6022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.908081', 'step': 6022, 'epoch': 1} {'type': 'loss', 'content': 0.11891587823629379, 'timestamp': '2025-09-10 02:33:06.910175', 'step': 6023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.944067', 'step': 6023, 'epoch': 1} {'type': 'loss', 'content': 0.13810621201992035, 'timestamp': '2025-09-10 02:33:06.967943', 'step': 6024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:06.997244', 'step': 6024, 'epoch': 1} {'type': 'loss', 'content': 0.11324827373027802, 'timestamp': '2025-09-10 02:33:06.999100', 'step': 6025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.029493', 'step': 6025, 'epoch': 1} {'type': 'loss', 'content': 0.129605233669281, 'timestamp': '2025-09-10 02:33:07.031645', 'step': 6026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.061882', 'step': 6026, 'epoch': 1} {'type': 'loss', 'content': 0.09131086617708206, 'timestamp': '2025-09-10 02:33:07.063647', 'step': 6027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.092408', 'step': 6027, 'epoch': 1} {'type': 'loss', 'content': 0.15546520054340363, 'timestamp': '2025-09-10 02:33:07.115989', 'step': 6028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.146585', 'step': 6028, 'epoch': 1} {'type': 'loss', 'content': 0.1715960055589676, 'timestamp': '2025-09-10 02:33:07.148935', 'step': 6029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.179088', 'step': 6029, 'epoch': 1} {'type': 'loss', 'content': 0.16188175976276398, 'timestamp': '2025-09-10 02:33:07.182900', 'step': 6030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.212673', 'step': 6030, 'epoch': 1} {'type': 'loss', 'content': 0.08460765331983566, 'timestamp': '2025-09-10 02:33:07.214854', 'step': 6031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:07.244432', 'step': 6031, 'epoch': 1} {'type': 'loss', 'content': 0.15322844684123993, 'timestamp': '2025-09-10 02:33:07.267857', 'step': 6032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.297824', 'step': 6032, 'epoch': 1} {'type': 'loss', 'content': 0.16723304986953735, 'timestamp': '2025-09-10 02:33:07.300431', 'step': 6033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.330223', 'step': 6033, 'epoch': 1} {'type': 'loss', 'content': 0.17114751040935516, 'timestamp': '2025-09-10 02:33:07.332207', 'step': 6034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:07.361683', 'step': 6034, 'epoch': 1} {'type': 'loss', 'content': 0.22542499005794525, 'timestamp': '2025-09-10 02:33:07.363975', 'step': 6035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.393850', 'step': 6035, 'epoch': 1} {'type': 'loss', 'content': 0.2568076550960541, 'timestamp': '2025-09-10 02:33:07.417126', 'step': 6036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.455996', 'step': 6036, 'epoch': 1} {'type': 'loss', 'content': 0.16745784878730774, 'timestamp': '2025-09-10 02:33:07.458309', 'step': 6037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.489297', 'step': 6037, 'epoch': 1} {'type': 'loss', 'content': 0.14470098912715912, 'timestamp': '2025-09-10 02:33:07.491767', 'step': 6038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.522204', 'step': 6038, 'epoch': 1} {'type': 'loss', 'content': 0.16980919241905212, 'timestamp': '2025-09-10 02:33:07.524439', 'step': 6039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.555550', 'step': 6039, 'epoch': 1} {'type': 'loss', 'content': 0.09351973235607147, 'timestamp': '2025-09-10 02:33:07.580702', 'step': 6040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:07.616743', 'step': 6040, 'epoch': 1} {'type': 'loss', 'content': 0.15868297219276428, 'timestamp': '2025-09-10 02:33:07.620590', 'step': 6041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.657957', 'step': 6041, 'epoch': 1} {'type': 'loss', 'content': 0.17480172216892242, 'timestamp': '2025-09-10 02:33:07.662647', 'step': 6042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.695389', 'step': 6042, 'epoch': 1} {'type': 'loss', 'content': 0.20123310387134552, 'timestamp': '2025-09-10 02:33:07.699612', 'step': 6043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.735367', 'step': 6043, 'epoch': 1} {'type': 'loss', 'content': 0.1015835851430893, 'timestamp': '2025-09-10 02:33:07.761754', 'step': 6044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.796268', 'step': 6044, 'epoch': 1} {'type': 'loss', 'content': 0.1312694102525711, 'timestamp': '2025-09-10 02:33:07.801184', 'step': 6045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:07.832930', 'step': 6045, 'epoch': 1} {'type': 'loss', 'content': 0.1417502611875534, 'timestamp': '2025-09-10 02:33:07.835177', 'step': 6046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.865097', 'step': 6046, 'epoch': 1} {'type': 'loss', 'content': 0.0813603550195694, 'timestamp': '2025-09-10 02:33:07.866846', 'step': 6047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.895953', 'step': 6047, 'epoch': 1} {'type': 'loss', 'content': 0.12219560146331787, 'timestamp': '2025-09-10 02:33:07.920300', 'step': 6048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.950201', 'step': 6048, 'epoch': 1} {'type': 'loss', 'content': 0.07562311738729477, 'timestamp': '2025-09-10 02:33:07.952318', 'step': 6049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:07.982263', 'step': 6049, 'epoch': 1} {'type': 'loss', 'content': 0.1556185632944107, 'timestamp': '2025-09-10 02:33:07.984654', 'step': 6050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:08.014579', 'step': 6050, 'epoch': 1} {'type': 'loss', 'content': 0.23225410282611847, 'timestamp': '2025-09-10 02:33:08.017315', 'step': 6051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:08.046694', 'step': 6051, 'epoch': 1} {'type': 'loss', 'content': 0.11818954348564148, 'timestamp': '2025-09-10 02:33:08.069890', 'step': 6052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:08.100044', 'step': 6052, 'epoch': 1} {'type': 'loss', 'content': 0.09328390657901764, 'timestamp': '2025-09-10 02:33:08.102553', 'step': 6053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.133811', 'step': 6053, 'epoch': 1} {'type': 'loss', 'content': 0.09364686161279678, 'timestamp': '2025-09-10 02:33:08.136427', 'step': 6054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:08.166303', 'step': 6054, 'epoch': 1} {'type': 'loss', 'content': 0.15809057652950287, 'timestamp': '2025-09-10 02:33:08.168379', 'step': 6055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.196821', 'step': 6055, 'epoch': 1} {'type': 'loss', 'content': 0.19030912220478058, 'timestamp': '2025-09-10 02:33:08.220512', 'step': 6056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.251315', 'step': 6056, 'epoch': 1} {'type': 'loss', 'content': 0.09847689419984818, 'timestamp': '2025-09-10 02:33:08.253629', 'step': 6057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:08.285314', 'step': 6057, 'epoch': 1} {'type': 'loss', 'content': 0.1668180674314499, 'timestamp': '2025-09-10 02:33:08.287696', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:33:15.894024', 'step': 6058, 'epoch': 1} {'type': 'pplx', 'content': 8133.421288452804, 'timestamp': '2025-09-10 02:33:15.896548', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:15.925540', 'step': 6058, 'epoch': 1} {'type': 'loss', 'content': 0.3149931728839874, 'timestamp': '2025-09-10 02:33:15.928085', 'step': 6059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.958647', 'step': 6059, 'epoch': 1} {'type': 'loss', 'content': 0.15636003017425537, 'timestamp': '2025-09-10 02:33:15.982219', 'step': 6060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:16.011667', 'step': 6060, 'epoch': 1} {'type': 'loss', 'content': 0.16551706194877625, 'timestamp': '2025-09-10 02:33:16.014102', 'step': 6061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:16.043890', 'step': 6061, 'epoch': 1} {'type': 'loss', 'content': 0.19689850509166718, 'timestamp': '2025-09-10 02:33:16.046276', 'step': 6062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:16.075693', 'step': 6062, 'epoch': 1} {'type': 'loss', 'content': 0.062430500984191895, 'timestamp': '2025-09-10 02:33:16.078087', 'step': 6063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.112554', 'step': 6063, 'epoch': 1} {'type': 'loss', 'content': 0.09172266721725464, 'timestamp': '2025-09-10 02:33:16.136032', 'step': 6064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:16.166526', 'step': 6064, 'epoch': 1} {'type': 'loss', 'content': 0.19875967502593994, 'timestamp': '2025-09-10 02:33:16.168495', 'step': 6065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.198108', 'step': 6065, 'epoch': 1} {'type': 'loss', 'content': 0.08501255512237549, 'timestamp': '2025-09-10 02:33:16.200446', 'step': 6066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.230027', 'step': 6066, 'epoch': 1} {'type': 'loss', 'content': 0.1774623692035675, 'timestamp': '2025-09-10 02:33:16.232230', 'step': 6067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:16.261918', 'step': 6067, 'epoch': 1} {'type': 'loss', 'content': 0.11522048711776733, 'timestamp': '2025-09-10 02:33:16.285377', 'step': 6068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:16.315396', 'step': 6068, 'epoch': 1} {'type': 'loss', 'content': 0.20216883718967438, 'timestamp': '2025-09-10 02:33:16.317652', 'step': 6069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:16.347408', 'step': 6069, 'epoch': 1} {'type': 'loss', 'content': 0.12207534909248352, 'timestamp': '2025-09-10 02:33:16.353809', 'step': 6070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:16.387143', 'step': 6070, 'epoch': 1} {'type': 'loss', 'content': 0.21962285041809082, 'timestamp': '2025-09-10 02:33:16.391597', 'step': 6071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:16.433544', 'step': 6071, 'epoch': 1} {'type': 'loss', 'content': 0.11787936091423035, 'timestamp': '2025-09-10 02:33:16.456806', 'step': 6072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.491638', 'step': 6072, 'epoch': 1} {'type': 'loss', 'content': 0.12526874244213104, 'timestamp': '2025-09-10 02:33:16.493631', 'step': 6073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:16.522773', 'step': 6073, 'epoch': 1} {'type': 'loss', 'content': 0.1320149153470993, 'timestamp': '2025-09-10 02:33:16.526143', 'step': 6074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:16.565019', 'step': 6074, 'epoch': 1} {'type': 'loss', 'content': 0.1494905948638916, 'timestamp': '2025-09-10 02:33:16.567344', 'step': 6075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:16.598081', 'step': 6075, 'epoch': 1} {'type': 'loss', 'content': 0.21086865663528442, 'timestamp': '2025-09-10 02:33:16.621342', 'step': 6076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.653540', 'step': 6076, 'epoch': 1} {'type': 'loss', 'content': 0.097932830452919, 'timestamp': '2025-09-10 02:33:16.655626', 'step': 6077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:16.685023', 'step': 6077, 'epoch': 1} {'type': 'loss', 'content': 0.10474196821451187, 'timestamp': '2025-09-10 02:33:16.687182', 'step': 6078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:16.717396', 'step': 6078, 'epoch': 1} {'type': 'loss', 'content': 0.13984528183937073, 'timestamp': '2025-09-10 02:33:16.719433', 'step': 6079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:16.754116', 'step': 6079, 'epoch': 1} {'type': 'loss', 'content': 0.18473444879055023, 'timestamp': '2025-09-10 02:33:16.777727', 'step': 6080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:16.812196', 'step': 6080, 'epoch': 1} {'type': 'loss', 'content': 0.21150627732276917, 'timestamp': '2025-09-10 02:33:16.814879', 'step': 6081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:16.849669', 'step': 6081, 'epoch': 1} {'type': 'loss', 'content': 0.09020786732435226, 'timestamp': '2025-09-10 02:33:16.852184', 'step': 6082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:16.883047', 'step': 6082, 'epoch': 1} {'type': 'loss', 'content': 0.15135659277439117, 'timestamp': '2025-09-10 02:33:16.885531', 'step': 6083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:16.917202', 'step': 6083, 'epoch': 1} {'type': 'loss', 'content': 0.12663593888282776, 'timestamp': '2025-09-10 02:33:16.940540', 'step': 6084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:16.981298', 'step': 6084, 'epoch': 1} {'type': 'loss', 'content': 0.1995379626750946, 'timestamp': '2025-09-10 02:33:16.983573', 'step': 6085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.014968', 'step': 6085, 'epoch': 1} {'type': 'loss', 'content': 0.19074328243732452, 'timestamp': '2025-09-10 02:33:17.021140', 'step': 6086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:17.059446', 'step': 6086, 'epoch': 1} {'type': 'loss', 'content': 0.29826897382736206, 'timestamp': '2025-09-10 02:33:17.062723', 'step': 6087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.092826', 'step': 6087, 'epoch': 1} {'type': 'loss', 'content': 0.21180574595928192, 'timestamp': '2025-09-10 02:33:17.120154', 'step': 6088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.152697', 'step': 6088, 'epoch': 1} {'type': 'loss', 'content': 0.0755065307021141, 'timestamp': '2025-09-10 02:33:17.154901', 'step': 6089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.184663', 'step': 6089, 'epoch': 1} {'type': 'loss', 'content': 0.1270950883626938, 'timestamp': '2025-09-10 02:33:17.188305', 'step': 6090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.217914', 'step': 6090, 'epoch': 1} {'type': 'loss', 'content': 0.12571491301059723, 'timestamp': '2025-09-10 02:33:17.220085', 'step': 6091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:17.252272', 'step': 6091, 'epoch': 1} {'type': 'loss', 'content': 0.2116534560918808, 'timestamp': '2025-09-10 02:33:17.275606', 'step': 6092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.306072', 'step': 6092, 'epoch': 1} {'type': 'loss', 'content': 0.11310073733329773, 'timestamp': '2025-09-10 02:33:17.308410', 'step': 6093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.338783', 'step': 6093, 'epoch': 1} {'type': 'loss', 'content': 0.11079177260398865, 'timestamp': '2025-09-10 02:33:17.341103', 'step': 6094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.371138', 'step': 6094, 'epoch': 1} {'type': 'loss', 'content': 0.25804632902145386, 'timestamp': '2025-09-10 02:33:17.374504', 'step': 6095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.404764', 'step': 6095, 'epoch': 1} {'type': 'loss', 'content': 0.22382235527038574, 'timestamp': '2025-09-10 02:33:17.428250', 'step': 6096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.461395', 'step': 6096, 'epoch': 1} {'type': 'loss', 'content': 0.1323813945055008, 'timestamp': '2025-09-10 02:33:17.465920', 'step': 6097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.501167', 'step': 6097, 'epoch': 1} {'type': 'loss', 'content': 0.11620942503213882, 'timestamp': '2025-09-10 02:33:17.505838', 'step': 6098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.535531', 'step': 6098, 'epoch': 1} {'type': 'loss', 'content': 0.13299067318439484, 'timestamp': '2025-09-10 02:33:17.537939', 'step': 6099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:17.568498', 'step': 6099, 'epoch': 1} {'type': 'loss', 'content': 0.11712914705276489, 'timestamp': '2025-09-10 02:33:17.592021', 'step': 6100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.622213', 'step': 6100, 'epoch': 1} {'type': 'loss', 'content': 0.11481355130672455, 'timestamp': '2025-09-10 02:33:17.624695', 'step': 6101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.654549', 'step': 6101, 'epoch': 1} {'type': 'loss', 'content': 0.11515549570322037, 'timestamp': '2025-09-10 02:33:17.656841', 'step': 6102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.686866', 'step': 6102, 'epoch': 1} {'type': 'loss', 'content': 0.13598991930484772, 'timestamp': '2025-09-10 02:33:17.690277', 'step': 6103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:17.722176', 'step': 6103, 'epoch': 1} {'type': 'loss', 'content': 0.1938723623752594, 'timestamp': '2025-09-10 02:33:17.745646', 'step': 6104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:17.775582', 'step': 6104, 'epoch': 1} {'type': 'loss', 'content': 0.15088868141174316, 'timestamp': '2025-09-10 02:33:17.777670', 'step': 6105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:17.807905', 'step': 6105, 'epoch': 1} {'type': 'loss', 'content': 0.1428033709526062, 'timestamp': '2025-09-10 02:33:17.809895', 'step': 6106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:17.839643', 'step': 6106, 'epoch': 1} {'type': 'loss', 'content': 0.21233707666397095, 'timestamp': '2025-09-10 02:33:17.842441', 'step': 6107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.871909', 'step': 6107, 'epoch': 1} {'type': 'loss', 'content': 0.18589356541633606, 'timestamp': '2025-09-10 02:33:17.895354', 'step': 6108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:17.925207', 'step': 6108, 'epoch': 1} {'type': 'loss', 'content': 0.18999046087265015, 'timestamp': '2025-09-10 02:33:17.927549', 'step': 6109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.957121', 'step': 6109, 'epoch': 1} {'type': 'loss', 'content': 0.13071195781230927, 'timestamp': '2025-09-10 02:33:17.959757', 'step': 6110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.990250', 'step': 6110, 'epoch': 1} {'type': 'loss', 'content': 0.1591653972864151, 'timestamp': '2025-09-10 02:33:17.992634', 'step': 6111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.022797', 'step': 6111, 'epoch': 1} {'type': 'loss', 'content': 0.1549849957227707, 'timestamp': '2025-09-10 02:33:18.046241', 'step': 6112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.075792', 'step': 6112, 'epoch': 1} {'type': 'loss', 'content': 0.14480124413967133, 'timestamp': '2025-09-10 02:33:18.077868', 'step': 6113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.107550', 'step': 6113, 'epoch': 1} {'type': 'loss', 'content': 0.1340639293193817, 'timestamp': '2025-09-10 02:33:18.109610', 'step': 6114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:18.138681', 'step': 6114, 'epoch': 1} {'type': 'loss', 'content': 0.10900627821683884, 'timestamp': '2025-09-10 02:33:18.141456', 'step': 6115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.171706', 'step': 6115, 'epoch': 1} {'type': 'loss', 'content': 0.17088735103607178, 'timestamp': '2025-09-10 02:33:18.195103', 'step': 6116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:18.224880', 'step': 6116, 'epoch': 1} {'type': 'loss', 'content': 0.162700816988945, 'timestamp': '2025-09-10 02:33:18.226837', 'step': 6117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:18.256310', 'step': 6117, 'epoch': 1} {'type': 'loss', 'content': 0.12784834206104279, 'timestamp': '2025-09-10 02:33:18.258465', 'step': 6118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:18.288705', 'step': 6118, 'epoch': 1} {'type': 'loss', 'content': 0.17516377568244934, 'timestamp': '2025-09-10 02:33:18.290891', 'step': 6119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:18.320870', 'step': 6119, 'epoch': 1} {'type': 'loss', 'content': 0.19570717215538025, 'timestamp': '2025-09-10 02:33:18.345325', 'step': 6120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:18.375634', 'step': 6120, 'epoch': 1} {'type': 'loss', 'content': 0.09602995961904526, 'timestamp': '2025-09-10 02:33:18.377789', 'step': 6121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.408433', 'step': 6121, 'epoch': 1} {'type': 'loss', 'content': 0.19393602013587952, 'timestamp': '2025-09-10 02:33:18.410508', 'step': 6122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.441125', 'step': 6122, 'epoch': 1} {'type': 'loss', 'content': 0.21310347318649292, 'timestamp': '2025-09-10 02:33:18.443370', 'step': 6123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.473208', 'step': 6123, 'epoch': 1} {'type': 'loss', 'content': 0.09410810470581055, 'timestamp': '2025-09-10 02:33:18.496655', 'step': 6124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.527434', 'step': 6124, 'epoch': 1} {'type': 'loss', 'content': 0.2376909852027893, 'timestamp': '2025-09-10 02:33:18.529994', 'step': 6125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.560125', 'step': 6125, 'epoch': 1} {'type': 'loss', 'content': 0.18780668079853058, 'timestamp': '2025-09-10 02:33:18.562421', 'step': 6126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:18.594354', 'step': 6126, 'epoch': 1} {'type': 'loss', 'content': 0.0954422578215599, 'timestamp': '2025-09-10 02:33:18.596794', 'step': 6127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:18.627053', 'step': 6127, 'epoch': 1} {'type': 'loss', 'content': 0.2029300034046173, 'timestamp': '2025-09-10 02:33:18.651616', 'step': 6128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.682360', 'step': 6128, 'epoch': 1} {'type': 'loss', 'content': 0.10191385447978973, 'timestamp': '2025-09-10 02:33:18.684889', 'step': 6129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.715406', 'step': 6129, 'epoch': 1} {'type': 'loss', 'content': 0.1648533195257187, 'timestamp': '2025-09-10 02:33:18.717574', 'step': 6130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.747494', 'step': 6130, 'epoch': 1} {'type': 'loss', 'content': 0.10759308934211731, 'timestamp': '2025-09-10 02:33:18.749839', 'step': 6131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.780879', 'step': 6131, 'epoch': 1} {'type': 'loss', 'content': 0.2843247950077057, 'timestamp': '2025-09-10 02:33:18.804320', 'step': 6132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:18.835429', 'step': 6132, 'epoch': 1} {'type': 'loss', 'content': 0.13122698664665222, 'timestamp': '2025-09-10 02:33:18.838095', 'step': 6133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:18.868149', 'step': 6133, 'epoch': 1} {'type': 'loss', 'content': 0.11187062412500381, 'timestamp': '2025-09-10 02:33:18.870550', 'step': 6134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.901210', 'step': 6134, 'epoch': 1} {'type': 'loss', 'content': 0.18764103949069977, 'timestamp': '2025-09-10 02:33:18.903717', 'step': 6135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:18.934427', 'step': 6135, 'epoch': 1} {'type': 'loss', 'content': 0.12220790237188339, 'timestamp': '2025-09-10 02:33:18.958193', 'step': 6136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:18.989314', 'step': 6136, 'epoch': 1} {'type': 'loss', 'content': 0.14809449017047882, 'timestamp': '2025-09-10 02:33:18.991504', 'step': 6137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.021441', 'step': 6137, 'epoch': 1} {'type': 'loss', 'content': 0.22765879333019257, 'timestamp': '2025-09-10 02:33:19.023715', 'step': 6138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.054800', 'step': 6138, 'epoch': 1} {'type': 'loss', 'content': 0.24470455944538116, 'timestamp': '2025-09-10 02:33:19.057298', 'step': 6139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.087565', 'step': 6139, 'epoch': 1} {'type': 'loss', 'content': 0.09383851289749146, 'timestamp': '2025-09-10 02:33:19.111196', 'step': 6140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.141569', 'step': 6140, 'epoch': 1} {'type': 'loss', 'content': 0.15626178681850433, 'timestamp': '2025-09-10 02:33:19.143951', 'step': 6141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.174230', 'step': 6141, 'epoch': 1} {'type': 'loss', 'content': 0.2518221437931061, 'timestamp': '2025-09-10 02:33:19.176570', 'step': 6142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.206870', 'step': 6142, 'epoch': 1} {'type': 'loss', 'content': 0.17160245776176453, 'timestamp': '2025-09-10 02:33:19.209356', 'step': 6143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.238756', 'step': 6143, 'epoch': 1} {'type': 'loss', 'content': 0.09287802129983902, 'timestamp': '2025-09-10 02:33:19.262071', 'step': 6144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.292571', 'step': 6144, 'epoch': 1} {'type': 'loss', 'content': 0.22717179358005524, 'timestamp': '2025-09-10 02:33:19.295241', 'step': 6145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.325540', 'step': 6145, 'epoch': 1} {'type': 'loss', 'content': 0.13063274323940277, 'timestamp': '2025-09-10 02:33:19.329635', 'step': 6146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.363568', 'step': 6146, 'epoch': 1} {'type': 'loss', 'content': 0.17497757077217102, 'timestamp': '2025-09-10 02:33:19.366086', 'step': 6147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.397288', 'step': 6147, 'epoch': 1} {'type': 'loss', 'content': 0.15873154997825623, 'timestamp': '2025-09-10 02:33:19.420743', 'step': 6148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.451075', 'step': 6148, 'epoch': 1} {'type': 'loss', 'content': 0.13547508418560028, 'timestamp': '2025-09-10 02:33:19.453184', 'step': 6149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:19.483422', 'step': 6149, 'epoch': 1} {'type': 'loss', 'content': 0.1561773717403412, 'timestamp': '2025-09-10 02:33:19.485690', 'step': 6150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.516198', 'step': 6150, 'epoch': 1} {'type': 'loss', 'content': 0.2026248425245285, 'timestamp': '2025-09-10 02:33:19.521385', 'step': 6151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:19.555246', 'step': 6151, 'epoch': 1} {'type': 'loss', 'content': 0.16368944942951202, 'timestamp': '2025-09-10 02:33:19.580020', 'step': 6152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.611855', 'step': 6152, 'epoch': 1} {'type': 'loss', 'content': 0.25164127349853516, 'timestamp': '2025-09-10 02:33:19.614255', 'step': 6153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:19.656030', 'step': 6153, 'epoch': 1} {'type': 'loss', 'content': 0.13700343668460846, 'timestamp': '2025-09-10 02:33:19.658727', 'step': 6154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.689810', 'step': 6154, 'epoch': 1} {'type': 'loss', 'content': 0.18517224490642548, 'timestamp': '2025-09-10 02:33:19.692049', 'step': 6155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.723866', 'step': 6155, 'epoch': 1} {'type': 'loss', 'content': 0.21426494419574738, 'timestamp': '2025-09-10 02:33:19.747286', 'step': 6156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:19.778071', 'step': 6156, 'epoch': 1} {'type': 'loss', 'content': 0.14813758432865143, 'timestamp': '2025-09-10 02:33:19.780373', 'step': 6157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:19.811163', 'step': 6157, 'epoch': 1} {'type': 'loss', 'content': 0.29982075095176697, 'timestamp': '2025-09-10 02:33:19.813510', 'step': 6158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.843142', 'step': 6158, 'epoch': 1} {'type': 'loss', 'content': 0.18125097453594208, 'timestamp': '2025-09-10 02:33:19.846640', 'step': 6159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.878045', 'step': 6159, 'epoch': 1} {'type': 'loss', 'content': 0.10286352783441544, 'timestamp': '2025-09-10 02:33:19.901871', 'step': 6160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.932057', 'step': 6160, 'epoch': 1} {'type': 'loss', 'content': 0.09504921734333038, 'timestamp': '2025-09-10 02:33:19.934329', 'step': 6161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.963973', 'step': 6161, 'epoch': 1} {'type': 'loss', 'content': 0.15406543016433716, 'timestamp': '2025-09-10 02:33:19.966519', 'step': 6162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.997424', 'step': 6162, 'epoch': 1} {'type': 'loss', 'content': 0.14794805645942688, 'timestamp': '2025-09-10 02:33:19.999191', 'step': 6163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.029659', 'step': 6163, 'epoch': 1} {'type': 'loss', 'content': 0.1583787202835083, 'timestamp': '2025-09-10 02:33:20.053402', 'step': 6164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.084099', 'step': 6164, 'epoch': 1} {'type': 'loss', 'content': 0.16453702747821808, 'timestamp': '2025-09-10 02:33:20.086219', 'step': 6165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.116301', 'step': 6165, 'epoch': 1} {'type': 'loss', 'content': 0.07741780579090118, 'timestamp': '2025-09-10 02:33:20.118343', 'step': 6166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.147807', 'step': 6166, 'epoch': 1} {'type': 'loss', 'content': 0.16153860092163086, 'timestamp': '2025-09-10 02:33:20.150444', 'step': 6167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:20.180819', 'step': 6167, 'epoch': 1} {'type': 'loss', 'content': 0.1302965134382248, 'timestamp': '2025-09-10 02:33:20.204423', 'step': 6168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.235984', 'step': 6168, 'epoch': 1} {'type': 'loss', 'content': 0.17250889539718628, 'timestamp': '2025-09-10 02:33:20.238321', 'step': 6169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.268147', 'step': 6169, 'epoch': 1} {'type': 'loss', 'content': 0.11056850850582123, 'timestamp': '2025-09-10 02:33:20.270492', 'step': 6170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:20.300369', 'step': 6170, 'epoch': 1} {'type': 'loss', 'content': 0.15935611724853516, 'timestamp': '2025-09-10 02:33:20.302706', 'step': 6171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.334045', 'step': 6171, 'epoch': 1} {'type': 'loss', 'content': 0.1192365288734436, 'timestamp': '2025-09-10 02:33:20.357572', 'step': 6172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.388036', 'step': 6172, 'epoch': 1} {'type': 'loss', 'content': 0.10483266413211823, 'timestamp': '2025-09-10 02:33:20.390421', 'step': 6173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.423187', 'step': 6173, 'epoch': 1} {'type': 'loss', 'content': 0.21537554264068604, 'timestamp': '2025-09-10 02:33:20.427490', 'step': 6174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.461547', 'step': 6174, 'epoch': 1} {'type': 'loss', 'content': 0.15970858931541443, 'timestamp': '2025-09-10 02:33:20.464094', 'step': 6175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.494206', 'step': 6175, 'epoch': 1} {'type': 'loss', 'content': 0.1335771083831787, 'timestamp': '2025-09-10 02:33:20.518191', 'step': 6176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.549221', 'step': 6176, 'epoch': 1} {'type': 'loss', 'content': 0.13526929914951324, 'timestamp': '2025-09-10 02:33:20.551783', 'step': 6177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.585027', 'step': 6177, 'epoch': 1} {'type': 'loss', 'content': 0.21314871311187744, 'timestamp': '2025-09-10 02:33:20.587665', 'step': 6178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.620124', 'step': 6178, 'epoch': 1} {'type': 'loss', 'content': 0.11939126998186111, 'timestamp': '2025-09-10 02:33:20.623548', 'step': 6179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.655723', 'step': 6179, 'epoch': 1} {'type': 'loss', 'content': 0.22570040822029114, 'timestamp': '2025-09-10 02:33:20.679205', 'step': 6180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:20.710018', 'step': 6180, 'epoch': 1} {'type': 'loss', 'content': 0.11996205151081085, 'timestamp': '2025-09-10 02:33:20.712260', 'step': 6181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.744144', 'step': 6181, 'epoch': 1} {'type': 'loss', 'content': 0.12335449457168579, 'timestamp': '2025-09-10 02:33:20.746981', 'step': 6182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.777867', 'step': 6182, 'epoch': 1} {'type': 'loss', 'content': 0.15506991744041443, 'timestamp': '2025-09-10 02:33:20.780169', 'step': 6183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.810248', 'step': 6183, 'epoch': 1} {'type': 'loss', 'content': 0.17593351006507874, 'timestamp': '2025-09-10 02:33:20.834417', 'step': 6184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:20.865078', 'step': 6184, 'epoch': 1} {'type': 'loss', 'content': 0.1616920530796051, 'timestamp': '2025-09-10 02:33:20.867351', 'step': 6185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:20.898356', 'step': 6185, 'epoch': 1} {'type': 'loss', 'content': 0.10822292417287827, 'timestamp': '2025-09-10 02:33:20.901123', 'step': 6186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.930912', 'step': 6186, 'epoch': 1} {'type': 'loss', 'content': 0.2015461027622223, 'timestamp': '2025-09-10 02:33:20.933544', 'step': 6187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:20.963209', 'step': 6187, 'epoch': 1} {'type': 'loss', 'content': 0.1200726330280304, 'timestamp': '2025-09-10 02:33:20.986723', 'step': 6188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:21.017634', 'step': 6188, 'epoch': 1} {'type': 'loss', 'content': 0.060558270663022995, 'timestamp': '2025-09-10 02:33:21.019992', 'step': 6189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:21.049555', 'step': 6189, 'epoch': 1} {'type': 'loss', 'content': 0.21098637580871582, 'timestamp': '2025-09-10 02:33:21.051974', 'step': 6190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:21.082485', 'step': 6190, 'epoch': 1} {'type': 'loss', 'content': 0.09667360037565231, 'timestamp': '2025-09-10 02:33:21.084780', 'step': 6191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:21.115148', 'step': 6191, 'epoch': 1} {'type': 'loss', 'content': 0.2316874861717224, 'timestamp': '2025-09-10 02:33:21.138632', 'step': 6192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:21.168464', 'step': 6192, 'epoch': 1} {'type': 'loss', 'content': 0.22521436214447021, 'timestamp': '2025-09-10 02:33:21.170957', 'step': 6193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:21.202549', 'step': 6193, 'epoch': 1} {'type': 'loss', 'content': 0.19802142679691315, 'timestamp': '2025-09-10 02:33:21.204619', 'step': 6194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:21.234750', 'step': 6194, 'epoch': 1} {'type': 'loss', 'content': 0.1654195338487625, 'timestamp': '2025-09-10 02:33:21.236881', 'step': 6195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:21.266553', 'step': 6195, 'epoch': 1} {'type': 'loss', 'content': 0.09332630783319473, 'timestamp': '2025-09-10 02:33:21.290664', 'step': 6196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:21.322389', 'step': 6196, 'epoch': 1} {'type': 'loss', 'content': 0.1744016408920288, 'timestamp': '2025-09-10 02:33:21.325015', 'step': 6197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:21.355352', 'step': 6197, 'epoch': 1} {'type': 'loss', 'content': 0.0997164323925972, 'timestamp': '2025-09-10 02:33:21.358097', 'step': 6198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:21.388981', 'step': 6198, 'epoch': 1} {'type': 'loss', 'content': 0.10744365304708481, 'timestamp': '2025-09-10 02:33:21.391364', 'step': 6199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:21.421760', 'step': 6199, 'epoch': 1} {'type': 'loss', 'content': 0.12276700139045715, 'timestamp': '2025-09-10 02:33:21.447504', 'step': 6200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:21.477955', 'step': 6200, 'epoch': 1} {'type': 'loss', 'content': 0.22080865502357483, 'timestamp': '2025-09-10 02:33:21.480197', 'step': 6201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:21.510663', 'step': 6201, 'epoch': 1} {'type': 'loss', 'content': 0.14037755131721497, 'timestamp': '2025-09-10 02:33:21.513330', 'step': 6202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:21.543820', 'step': 6202, 'epoch': 1} {'type': 'loss', 'content': 0.1563444584608078, 'timestamp': '2025-09-10 02:33:21.545890', 'step': 6203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:21.575206', 'step': 6203, 'epoch': 1} {'type': 'loss', 'content': 0.17444685101509094, 'timestamp': '2025-09-10 02:33:21.600035', 'step': 6204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:21.630319', 'step': 6204, 'epoch': 1} {'type': 'loss', 'content': 0.1323404759168625, 'timestamp': '2025-09-10 02:33:21.632625', 'step': 6205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:21.663192', 'step': 6205, 'epoch': 1} {'type': 'loss', 'content': 0.25384652614593506, 'timestamp': '2025-09-10 02:33:21.665484', 'step': 6206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:21.696740', 'step': 6206, 'epoch': 1} {'type': 'loss', 'content': 0.13257460296154022, 'timestamp': '2025-09-10 02:33:21.699032', 'step': 6207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:21.729025', 'step': 6207, 'epoch': 1} {'type': 'loss', 'content': 0.1376643031835556, 'timestamp': '2025-09-10 02:33:21.753249', 'step': 6208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:21.784468', 'step': 6208, 'epoch': 1} {'type': 'loss', 'content': 0.19395793974399567, 'timestamp': '2025-09-10 02:33:21.787022', 'step': 6209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:21.817419', 'step': 6209, 'epoch': 1} {'type': 'loss', 'content': 0.18409931659698486, 'timestamp': '2025-09-10 02:33:21.819626', 'step': 6210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:21.854814', 'step': 6210, 'epoch': 1} {'type': 'loss', 'content': 0.19548220932483673, 'timestamp': '2025-09-10 02:33:21.859423', 'step': 6211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:21.896980', 'step': 6211, 'epoch': 1} {'type': 'loss', 'content': 0.1611323207616806, 'timestamp': '2025-09-10 02:33:21.922070', 'step': 6212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:21.955548', 'step': 6212, 'epoch': 1} {'type': 'loss', 'content': 0.17079800367355347, 'timestamp': '2025-09-10 02:33:21.958632', 'step': 6213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:21.989622', 'step': 6213, 'epoch': 1} {'type': 'loss', 'content': 0.1374315321445465, 'timestamp': '2025-09-10 02:33:21.992075', 'step': 6214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.024028', 'step': 6214, 'epoch': 1} {'type': 'loss', 'content': 0.09780075401067734, 'timestamp': '2025-09-10 02:33:22.026340', 'step': 6215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.056226', 'step': 6215, 'epoch': 1} {'type': 'loss', 'content': 0.1649329513311386, 'timestamp': '2025-09-10 02:33:22.079449', 'step': 6216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.110648', 'step': 6216, 'epoch': 1} {'type': 'loss', 'content': 0.1645956039428711, 'timestamp': '2025-09-10 02:33:22.112856', 'step': 6217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:22.143095', 'step': 6217, 'epoch': 1} {'type': 'loss', 'content': 0.25413817167282104, 'timestamp': '2025-09-10 02:33:22.145446', 'step': 6218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:22.175388', 'step': 6218, 'epoch': 1} {'type': 'loss', 'content': 0.3323650360107422, 'timestamp': '2025-09-10 02:33:22.178164', 'step': 6219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:22.208381', 'step': 6219, 'epoch': 1} {'type': 'loss', 'content': 0.11828938126564026, 'timestamp': '2025-09-10 02:33:22.231898', 'step': 6220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.262229', 'step': 6220, 'epoch': 1} {'type': 'loss', 'content': 0.14211924374103546, 'timestamp': '2025-09-10 02:33:22.264517', 'step': 6221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:22.295025', 'step': 6221, 'epoch': 1} {'type': 'loss', 'content': 0.13910990953445435, 'timestamp': '2025-09-10 02:33:22.297414', 'step': 6222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.327585', 'step': 6222, 'epoch': 1} {'type': 'loss', 'content': 0.10862507671117783, 'timestamp': '2025-09-10 02:33:22.329975', 'step': 6223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.360376', 'step': 6223, 'epoch': 1} {'type': 'loss', 'content': 0.11014144867658615, 'timestamp': '2025-09-10 02:33:22.384379', 'step': 6224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.417083', 'step': 6224, 'epoch': 1} {'type': 'loss', 'content': 0.10878297686576843, 'timestamp': '2025-09-10 02:33:22.420806', 'step': 6225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.450932', 'step': 6225, 'epoch': 1} {'type': 'loss', 'content': 0.1421109288930893, 'timestamp': '2025-09-10 02:33:22.453252', 'step': 6226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.483188', 'step': 6226, 'epoch': 1} {'type': 'loss', 'content': 0.20565086603164673, 'timestamp': '2025-09-10 02:33:22.485570', 'step': 6227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.516223', 'step': 6227, 'epoch': 1} {'type': 'loss', 'content': 0.2870606482028961, 'timestamp': '2025-09-10 02:33:22.539818', 'step': 6228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.570150', 'step': 6228, 'epoch': 1} {'type': 'loss', 'content': 0.11790303885936737, 'timestamp': '2025-09-10 02:33:22.572306', 'step': 6229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.603450', 'step': 6229, 'epoch': 1} {'type': 'loss', 'content': 0.14575490355491638, 'timestamp': '2025-09-10 02:33:22.606444', 'step': 6230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:22.637738', 'step': 6230, 'epoch': 1} {'type': 'loss', 'content': 0.11525605618953705, 'timestamp': '2025-09-10 02:33:22.639935', 'step': 6231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.669959', 'step': 6231, 'epoch': 1} {'type': 'loss', 'content': 0.2553168535232544, 'timestamp': '2025-09-10 02:33:22.693629', 'step': 6232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.728139', 'step': 6232, 'epoch': 1} {'type': 'loss', 'content': 0.10510776937007904, 'timestamp': '2025-09-10 02:33:22.730407', 'step': 6233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.761089', 'step': 6233, 'epoch': 1} {'type': 'loss', 'content': 0.1493488997220993, 'timestamp': '2025-09-10 02:33:22.763569', 'step': 6234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:22.794931', 'step': 6234, 'epoch': 1} {'type': 'loss', 'content': 0.1751447170972824, 'timestamp': '2025-09-10 02:33:22.797317', 'step': 6235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.835844', 'step': 6235, 'epoch': 1} {'type': 'loss', 'content': 0.1344456672668457, 'timestamp': '2025-09-10 02:33:22.861612', 'step': 6236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.893096', 'step': 6236, 'epoch': 1} {'type': 'loss', 'content': 0.13870543241500854, 'timestamp': '2025-09-10 02:33:22.895408', 'step': 6237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:22.926753', 'step': 6237, 'epoch': 1} {'type': 'loss', 'content': 0.09162019938230515, 'timestamp': '2025-09-10 02:33:22.929170', 'step': 6238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:22.960811', 'step': 6238, 'epoch': 1} {'type': 'loss', 'content': 0.25156110525131226, 'timestamp': '2025-09-10 02:33:22.963129', 'step': 6239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.993517', 'step': 6239, 'epoch': 1} {'type': 'loss', 'content': 0.18250411748886108, 'timestamp': '2025-09-10 02:33:23.016991', 'step': 6240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:23.049619', 'step': 6240, 'epoch': 1} {'type': 'loss', 'content': 0.1646886020898819, 'timestamp': '2025-09-10 02:33:23.052008', 'step': 6241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:23.082455', 'step': 6241, 'epoch': 1} {'type': 'loss', 'content': 0.2477530837059021, 'timestamp': '2025-09-10 02:33:23.085804', 'step': 6242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:23.115792', 'step': 6242, 'epoch': 1} {'type': 'loss', 'content': 0.13103367388248444, 'timestamp': '2025-09-10 02:33:23.117984', 'step': 6243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:23.147728', 'step': 6243, 'epoch': 1} {'type': 'loss', 'content': 0.18947161734104156, 'timestamp': '2025-09-10 02:33:23.171536', 'step': 6244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:23.202195', 'step': 6244, 'epoch': 1} {'type': 'loss', 'content': 0.0501888170838356, 'timestamp': '2025-09-10 02:33:23.204245', 'step': 6245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:23.235559', 'step': 6245, 'epoch': 1} {'type': 'loss', 'content': 0.10688871890306473, 'timestamp': '2025-09-10 02:33:23.238094', 'step': 6246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:23.268446', 'step': 6246, 'epoch': 1} {'type': 'loss', 'content': 0.22742536664009094, 'timestamp': '2025-09-10 02:33:23.270814', 'step': 6247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:23.300963', 'step': 6247, 'epoch': 1} {'type': 'loss', 'content': 0.12401916086673737, 'timestamp': '2025-09-10 02:33:23.324888', 'step': 6248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:23.354905', 'step': 6248, 'epoch': 1} {'type': 'loss', 'content': 0.15784350037574768, 'timestamp': '2025-09-10 02:33:23.357092', 'step': 6249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:23.387675', 'step': 6249, 'epoch': 1} {'type': 'loss', 'content': 0.18478107452392578, 'timestamp': '2025-09-10 02:33:23.391088', 'step': 6250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:23.421074', 'step': 6250, 'epoch': 1} {'type': 'loss', 'content': 0.12845155596733093, 'timestamp': '2025-09-10 02:33:23.423274', 'step': 6251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:23.453520', 'step': 6251, 'epoch': 1} {'type': 'loss', 'content': 0.1171618327498436, 'timestamp': '2025-09-10 02:33:23.476960', 'step': 6252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:23.507027', 'step': 6252, 'epoch': 1} {'type': 'loss', 'content': 0.08999467641115189, 'timestamp': '2025-09-10 02:33:23.509310', 'step': 6253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:23.539122', 'step': 6253, 'epoch': 1} {'type': 'loss', 'content': 0.22507227957248688, 'timestamp': '2025-09-10 02:33:23.541357', 'step': 6254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:23.571176', 'step': 6254, 'epoch': 1} {'type': 'loss', 'content': 0.30789071321487427, 'timestamp': '2025-09-10 02:33:23.573692', 'step': 6255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:33:23.605071', 'step': 6255, 'epoch': 1} {'type': 'loss', 'content': 0.12550416588783264, 'timestamp': '2025-09-10 02:33:23.630072', 'step': 6256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:23.660359', 'step': 6256, 'epoch': 1} {'type': 'loss', 'content': 0.1150038093328476, 'timestamp': '2025-09-10 02:33:23.662670', 'step': 6257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:23.692554', 'step': 6257, 'epoch': 1} {'type': 'loss', 'content': 0.13956023752689362, 'timestamp': '2025-09-10 02:33:23.695765', 'step': 6258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:33:23.726930', 'step': 6258, 'epoch': 1} {'type': 'loss', 'content': 0.18672959506511688, 'timestamp': '2025-09-10 02:33:23.730679', 'step': 6259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:23.760857', 'step': 6259, 'epoch': 1} {'type': 'loss', 'content': 0.16754566133022308, 'timestamp': '2025-09-10 02:33:23.784224', 'step': 6260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:23.815032', 'step': 6260, 'epoch': 1} {'type': 'loss', 'content': 0.0750623568892479, 'timestamp': '2025-09-10 02:33:23.817472', 'step': 6261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:23.848513', 'step': 6261, 'epoch': 1} {'type': 'loss', 'content': 0.11838475614786148, 'timestamp': '2025-09-10 02:33:23.850576', 'step': 6262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:23.881295', 'step': 6262, 'epoch': 1} {'type': 'loss', 'content': 0.15508830547332764, 'timestamp': '2025-09-10 02:33:23.883493', 'step': 6263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:23.913770', 'step': 6263, 'epoch': 1} {'type': 'loss', 'content': 0.1535453051328659, 'timestamp': '2025-09-10 02:33:23.937372', 'step': 6264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:23.968701', 'step': 6264, 'epoch': 1} {'type': 'loss', 'content': 0.13821275532245636, 'timestamp': '2025-09-10 02:33:23.970704', 'step': 6265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:24.000372', 'step': 6265, 'epoch': 1} {'type': 'loss', 'content': 0.2061542570590973, 'timestamp': '2025-09-10 02:33:24.002513', 'step': 6266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.032623', 'step': 6266, 'epoch': 1} {'type': 'loss', 'content': 0.14287663996219635, 'timestamp': '2025-09-10 02:33:24.034909', 'step': 6267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.064887', 'step': 6267, 'epoch': 1} {'type': 'loss', 'content': 0.14857719838619232, 'timestamp': '2025-09-10 02:33:24.088977', 'step': 6268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.119646', 'step': 6268, 'epoch': 1} {'type': 'loss', 'content': 0.16218170523643494, 'timestamp': '2025-09-10 02:33:24.122026', 'step': 6269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:24.153366', 'step': 6269, 'epoch': 1} {'type': 'loss', 'content': 0.1626780778169632, 'timestamp': '2025-09-10 02:33:24.155715', 'step': 6270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:24.186355', 'step': 6270, 'epoch': 1} {'type': 'loss', 'content': 0.19880807399749756, 'timestamp': '2025-09-10 02:33:24.188708', 'step': 6271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.219789', 'step': 6271, 'epoch': 1} {'type': 'loss', 'content': 0.1445290446281433, 'timestamp': '2025-09-10 02:33:24.243386', 'step': 6272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:24.274361', 'step': 6272, 'epoch': 1} {'type': 'loss', 'content': 0.11293486505746841, 'timestamp': '2025-09-10 02:33:24.276728', 'step': 6273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:24.306585', 'step': 6273, 'epoch': 1} {'type': 'loss', 'content': 0.16476714611053467, 'timestamp': '2025-09-10 02:33:24.309141', 'step': 6274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:24.338854', 'step': 6274, 'epoch': 1} {'type': 'loss', 'content': 0.1293657124042511, 'timestamp': '2025-09-10 02:33:24.341866', 'step': 6275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.372639', 'step': 6275, 'epoch': 1} {'type': 'loss', 'content': 0.23279644548892975, 'timestamp': '2025-09-10 02:33:24.396029', 'step': 6276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:24.427481', 'step': 6276, 'epoch': 1} {'type': 'loss', 'content': 0.136946901679039, 'timestamp': '2025-09-10 02:33:24.431994', 'step': 6277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.465434', 'step': 6277, 'epoch': 1} {'type': 'loss', 'content': 0.20097319781780243, 'timestamp': '2025-09-10 02:33:24.467728', 'step': 6278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:24.497438', 'step': 6278, 'epoch': 1} {'type': 'loss', 'content': 0.18876171112060547, 'timestamp': '2025-09-10 02:33:24.499284', 'step': 6279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:24.531190', 'step': 6279, 'epoch': 1} {'type': 'loss', 'content': 0.13210828602313995, 'timestamp': '2025-09-10 02:33:24.554609', 'step': 6280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:24.586030', 'step': 6280, 'epoch': 1} {'type': 'loss', 'content': 0.15747778117656708, 'timestamp': '2025-09-10 02:33:24.589185', 'step': 6281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.619270', 'step': 6281, 'epoch': 1} {'type': 'loss', 'content': 0.17147812247276306, 'timestamp': '2025-09-10 02:33:24.621221', 'step': 6282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:24.650732', 'step': 6282, 'epoch': 1} {'type': 'loss', 'content': 0.30196577310562134, 'timestamp': '2025-09-10 02:33:24.653069', 'step': 6283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:24.683037', 'step': 6283, 'epoch': 1} {'type': 'loss', 'content': 0.1340702772140503, 'timestamp': '2025-09-10 02:33:24.707163', 'step': 6284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:24.738290', 'step': 6284, 'epoch': 1} {'type': 'loss', 'content': 0.15527553856372833, 'timestamp': '2025-09-10 02:33:24.741096', 'step': 6285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:24.772045', 'step': 6285, 'epoch': 1} {'type': 'loss', 'content': 0.16397914290428162, 'timestamp': '2025-09-10 02:33:24.774247', 'step': 6286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.810858', 'step': 6286, 'epoch': 1} {'type': 'loss', 'content': 0.16132842004299164, 'timestamp': '2025-09-10 02:33:24.812972', 'step': 6287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:24.842798', 'step': 6287, 'epoch': 1} {'type': 'loss', 'content': 0.22413459420204163, 'timestamp': '2025-09-10 02:33:24.866113', 'step': 6288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.897402', 'step': 6288, 'epoch': 1} {'type': 'loss', 'content': 0.18081331253051758, 'timestamp': '2025-09-10 02:33:24.899677', 'step': 6289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.930249', 'step': 6289, 'epoch': 1} {'type': 'loss', 'content': 0.11255233734846115, 'timestamp': '2025-09-10 02:33:24.932369', 'step': 6290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:24.962266', 'step': 6290, 'epoch': 1} {'type': 'loss', 'content': 0.24533866345882416, 'timestamp': '2025-09-10 02:33:24.964797', 'step': 6291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:24.994327', 'step': 6291, 'epoch': 1} {'type': 'loss', 'content': 0.23644520342350006, 'timestamp': '2025-09-10 02:33:25.018116', 'step': 6292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:25.049101', 'step': 6292, 'epoch': 1} {'type': 'loss', 'content': 0.10096986591815948, 'timestamp': '2025-09-10 02:33:25.051394', 'step': 6293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:25.082146', 'step': 6293, 'epoch': 1} {'type': 'loss', 'content': 0.31973719596862793, 'timestamp': '2025-09-10 02:33:25.084392', 'step': 6294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:25.114562', 'step': 6294, 'epoch': 1} {'type': 'loss', 'content': 0.2134213000535965, 'timestamp': '2025-09-10 02:33:25.118134', 'step': 6295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:25.148405', 'step': 6295, 'epoch': 1} {'type': 'loss', 'content': 0.10008862614631653, 'timestamp': '2025-09-10 02:33:25.172076', 'step': 6296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:25.203211', 'step': 6296, 'epoch': 1} {'type': 'loss', 'content': 0.12319539487361908, 'timestamp': '2025-09-10 02:33:25.205232', 'step': 6297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:25.237670', 'step': 6297, 'epoch': 1} {'type': 'loss', 'content': 0.18175165355205536, 'timestamp': '2025-09-10 02:33:25.240118', 'step': 6298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:25.270942', 'step': 6298, 'epoch': 1} {'type': 'loss', 'content': 0.1441674530506134, 'timestamp': '2025-09-10 02:33:25.273489', 'step': 6299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:25.303015', 'step': 6299, 'epoch': 1} {'type': 'loss', 'content': 0.20380206406116486, 'timestamp': '2025-09-10 02:33:25.326423', 'step': 6300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:25.356594', 'step': 6300, 'epoch': 1} {'type': 'loss', 'content': 0.1977388709783554, 'timestamp': '2025-09-10 02:33:25.358637', 'step': 6301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:25.388700', 'step': 6301, 'epoch': 1} {'type': 'loss', 'content': 0.11341416090726852, 'timestamp': '2025-09-10 02:33:25.390586', 'step': 6302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:25.420714', 'step': 6302, 'epoch': 1} {'type': 'loss', 'content': 0.10973788052797318, 'timestamp': '2025-09-10 02:33:25.422992', 'step': 6303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:25.453754', 'step': 6303, 'epoch': 1} {'type': 'loss', 'content': 0.150999054312706, 'timestamp': '2025-09-10 02:33:25.478071', 'step': 6304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:25.508593', 'step': 6304, 'epoch': 1} {'type': 'loss', 'content': 0.1137416735291481, 'timestamp': '2025-09-10 02:33:25.510978', 'step': 6305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:25.541696', 'step': 6305, 'epoch': 1} {'type': 'loss', 'content': 0.15601184964179993, 'timestamp': '2025-09-10 02:33:25.544384', 'step': 6306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:25.574607', 'step': 6306, 'epoch': 1} {'type': 'loss', 'content': 0.1826048344373703, 'timestamp': '2025-09-10 02:33:25.576851', 'step': 6307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:33:25.606554', 'step': 6307, 'epoch': 1} {'type': 'loss', 'content': 0.06739901751279831, 'timestamp': '2025-09-10 02:33:25.632070', 'step': 6308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:25.661861', 'step': 6308, 'epoch': 1} {'type': 'loss', 'content': 0.10641761869192123, 'timestamp': '2025-09-10 02:33:25.664082', 'step': 6309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:25.694060', 'step': 6309, 'epoch': 1} {'type': 'loss', 'content': 0.14602810144424438, 'timestamp': '2025-09-10 02:33:25.696357', 'step': 6310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:25.726837', 'step': 6310, 'epoch': 1} {'type': 'loss', 'content': 0.13105140626430511, 'timestamp': '2025-09-10 02:33:25.728984', 'step': 6311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:25.758358', 'step': 6311, 'epoch': 1} {'type': 'loss', 'content': 0.07814653217792511, 'timestamp': '2025-09-10 02:33:25.781987', 'step': 6312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:25.812299', 'step': 6312, 'epoch': 1} {'type': 'loss', 'content': 0.14449238777160645, 'timestamp': '2025-09-10 02:33:25.814469', 'step': 6313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:25.845350', 'step': 6313, 'epoch': 1} {'type': 'loss', 'content': 0.1287773847579956, 'timestamp': '2025-09-10 02:33:25.847631', 'step': 6314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:25.878037', 'step': 6314, 'epoch': 1} {'type': 'loss', 'content': 0.21196292340755463, 'timestamp': '2025-09-10 02:33:25.880469', 'step': 6315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:25.910809', 'step': 6315, 'epoch': 1} {'type': 'loss', 'content': 0.1911567747592926, 'timestamp': '2025-09-10 02:33:25.934355', 'step': 6316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:25.965963', 'step': 6316, 'epoch': 1} {'type': 'loss', 'content': 0.20711159706115723, 'timestamp': '2025-09-10 02:33:25.968308', 'step': 6317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:25.998190', 'step': 6317, 'epoch': 1} {'type': 'loss', 'content': 0.28957197070121765, 'timestamp': '2025-09-10 02:33:26.000602', 'step': 6318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.030822', 'step': 6318, 'epoch': 1} {'type': 'loss', 'content': 0.2615310251712799, 'timestamp': '2025-09-10 02:33:26.033131', 'step': 6319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:26.063352', 'step': 6319, 'epoch': 1} {'type': 'loss', 'content': 0.13043928146362305, 'timestamp': '2025-09-10 02:33:26.087034', 'step': 6320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.117732', 'step': 6320, 'epoch': 1} {'type': 'loss', 'content': 0.1591884046792984, 'timestamp': '2025-09-10 02:33:26.120001', 'step': 6321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:26.152761', 'step': 6321, 'epoch': 1} {'type': 'loss', 'content': 0.24059395492076874, 'timestamp': '2025-09-10 02:33:26.155151', 'step': 6322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:26.185434', 'step': 6322, 'epoch': 1} {'type': 'loss', 'content': 0.12876807153224945, 'timestamp': '2025-09-10 02:33:26.187791', 'step': 6323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.219081', 'step': 6323, 'epoch': 1} {'type': 'loss', 'content': 0.10946253687143326, 'timestamp': '2025-09-10 02:33:26.242726', 'step': 6324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:26.274167', 'step': 6324, 'epoch': 1} {'type': 'loss', 'content': 0.1717958301305771, 'timestamp': '2025-09-10 02:33:26.276453', 'step': 6325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:26.306211', 'step': 6325, 'epoch': 1} {'type': 'loss', 'content': 0.13554078340530396, 'timestamp': '2025-09-10 02:33:26.308572', 'step': 6326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:26.338844', 'step': 6326, 'epoch': 1} {'type': 'loss', 'content': 0.10267948359251022, 'timestamp': '2025-09-10 02:33:26.341282', 'step': 6327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.371824', 'step': 6327, 'epoch': 1} {'type': 'loss', 'content': 0.1298380196094513, 'timestamp': '2025-09-10 02:33:26.395298', 'step': 6328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:26.426401', 'step': 6328, 'epoch': 1} {'type': 'loss', 'content': 0.1269867867231369, 'timestamp': '2025-09-10 02:33:26.429816', 'step': 6329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.462665', 'step': 6329, 'epoch': 1} {'type': 'loss', 'content': 0.12801964581012726, 'timestamp': '2025-09-10 02:33:26.465127', 'step': 6330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.496610', 'step': 6330, 'epoch': 1} {'type': 'loss', 'content': 0.165410578250885, 'timestamp': '2025-09-10 02:33:26.498878', 'step': 6331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.528864', 'step': 6331, 'epoch': 1} {'type': 'loss', 'content': 0.1370910108089447, 'timestamp': '2025-09-10 02:33:26.552747', 'step': 6332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:26.583450', 'step': 6332, 'epoch': 1} {'type': 'loss', 'content': 0.09809651225805283, 'timestamp': '2025-09-10 02:33:26.585733', 'step': 6333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:26.616380', 'step': 6333, 'epoch': 1} {'type': 'loss', 'content': 0.14964993298053741, 'timestamp': '2025-09-10 02:33:26.618738', 'step': 6334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.648865', 'step': 6334, 'epoch': 1} {'type': 'loss', 'content': 0.1219855472445488, 'timestamp': '2025-09-10 02:33:26.650911', 'step': 6335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.680567', 'step': 6335, 'epoch': 1} {'type': 'loss', 'content': 0.1940879374742508, 'timestamp': '2025-09-10 02:33:26.703826', 'step': 6336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:26.734192', 'step': 6336, 'epoch': 1} {'type': 'loss', 'content': 0.11105953902006149, 'timestamp': '2025-09-10 02:33:26.736317', 'step': 6337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:26.765909', 'step': 6337, 'epoch': 1} {'type': 'loss', 'content': 0.15879759192466736, 'timestamp': '2025-09-10 02:33:26.768414', 'step': 6338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:26.798031', 'step': 6338, 'epoch': 1} {'type': 'loss', 'content': 0.18622750043869019, 'timestamp': '2025-09-10 02:33:26.800337', 'step': 6339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.830372', 'step': 6339, 'epoch': 1} {'type': 'loss', 'content': 0.13101151585578918, 'timestamp': '2025-09-10 02:33:26.855038', 'step': 6340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:26.885362', 'step': 6340, 'epoch': 1} {'type': 'loss', 'content': 0.1560051590204239, 'timestamp': '2025-09-10 02:33:26.887817', 'step': 6341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:26.917667', 'step': 6341, 'epoch': 1} {'type': 'loss', 'content': 0.21449165046215057, 'timestamp': '2025-09-10 02:33:26.919976', 'step': 6342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:26.949898', 'step': 6342, 'epoch': 1} {'type': 'loss', 'content': 0.22572451829910278, 'timestamp': '2025-09-10 02:33:26.952780', 'step': 6343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:26.982430', 'step': 6343, 'epoch': 1} {'type': 'loss', 'content': 0.13235968351364136, 'timestamp': '2025-09-10 02:33:27.006258', 'step': 6344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:27.036288', 'step': 6344, 'epoch': 1} {'type': 'loss', 'content': 0.2047753930091858, 'timestamp': '2025-09-10 02:33:27.038570', 'step': 6345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.068524', 'step': 6345, 'epoch': 1} {'type': 'loss', 'content': 0.12616360187530518, 'timestamp': '2025-09-10 02:33:27.071062', 'step': 6346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.101663', 'step': 6346, 'epoch': 1} {'type': 'loss', 'content': 0.1160740777850151, 'timestamp': '2025-09-10 02:33:27.103872', 'step': 6347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:27.134002', 'step': 6347, 'epoch': 1} {'type': 'loss', 'content': 0.12291330844163895, 'timestamp': '2025-09-10 02:33:27.158546', 'step': 6348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:27.188756', 'step': 6348, 'epoch': 1} {'type': 'loss', 'content': 0.1418859213590622, 'timestamp': '2025-09-10 02:33:27.191126', 'step': 6349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.221273', 'step': 6349, 'epoch': 1} {'type': 'loss', 'content': 0.17507125437259674, 'timestamp': '2025-09-10 02:33:27.223539', 'step': 6350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:27.252882', 'step': 6350, 'epoch': 1} {'type': 'loss', 'content': 0.1427484154701233, 'timestamp': '2025-09-10 02:33:27.255483', 'step': 6351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:27.284930', 'step': 6351, 'epoch': 1} {'type': 'loss', 'content': 0.13932134211063385, 'timestamp': '2025-09-10 02:33:27.308403', 'step': 6352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.338281', 'step': 6352, 'epoch': 1} {'type': 'loss', 'content': 0.1485213041305542, 'timestamp': '2025-09-10 02:33:27.341112', 'step': 6353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:27.370884', 'step': 6353, 'epoch': 1} {'type': 'loss', 'content': 0.1457577794790268, 'timestamp': '2025-09-10 02:33:27.373243', 'step': 6354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:27.402368', 'step': 6354, 'epoch': 1} {'type': 'loss', 'content': 0.1733027845621109, 'timestamp': '2025-09-10 02:33:27.404985', 'step': 6355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.435704', 'step': 6355, 'epoch': 1} {'type': 'loss', 'content': 0.1504106968641281, 'timestamp': '2025-09-10 02:33:27.459174', 'step': 6356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:27.489447', 'step': 6356, 'epoch': 1} {'type': 'loss', 'content': 0.14187294244766235, 'timestamp': '2025-09-10 02:33:27.496370', 'step': 6357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.529536', 'step': 6357, 'epoch': 1} {'type': 'loss', 'content': 0.14532309770584106, 'timestamp': '2025-09-10 02:33:27.533263', 'step': 6358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:27.575786', 'step': 6358, 'epoch': 1} {'type': 'loss', 'content': 0.12966307997703552, 'timestamp': '2025-09-10 02:33:27.578001', 'step': 6359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.611072', 'step': 6359, 'epoch': 1} {'type': 'loss', 'content': 0.26126933097839355, 'timestamp': '2025-09-10 02:33:27.636165', 'step': 6360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:27.667803', 'step': 6360, 'epoch': 1} {'type': 'loss', 'content': 0.09594468772411346, 'timestamp': '2025-09-10 02:33:27.670384', 'step': 6361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.699885', 'step': 6361, 'epoch': 1} {'type': 'loss', 'content': 0.13111495971679688, 'timestamp': '2025-09-10 02:33:27.701609', 'step': 6362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.731042', 'step': 6362, 'epoch': 1} {'type': 'loss', 'content': 0.12343142181634903, 'timestamp': '2025-09-10 02:33:27.733508', 'step': 6363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:27.763666', 'step': 6363, 'epoch': 1} {'type': 'loss', 'content': 0.20467320084571838, 'timestamp': '2025-09-10 02:33:27.787565', 'step': 6364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:27.817806', 'step': 6364, 'epoch': 1} {'type': 'loss', 'content': 0.17710363864898682, 'timestamp': '2025-09-10 02:33:27.820605', 'step': 6365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:27.851960', 'step': 6365, 'epoch': 1} {'type': 'loss', 'content': 0.12867869436740875, 'timestamp': '2025-09-10 02:33:27.854319', 'step': 6366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:27.884173', 'step': 6366, 'epoch': 1} {'type': 'loss', 'content': 0.11251750588417053, 'timestamp': '2025-09-10 02:33:27.886653', 'step': 6367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:27.918699', 'step': 6367, 'epoch': 1} {'type': 'loss', 'content': 0.12364497035741806, 'timestamp': '2025-09-10 02:33:27.942082', 'step': 6368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:27.977640', 'step': 6368, 'epoch': 1} {'type': 'loss', 'content': 0.16429036855697632, 'timestamp': '2025-09-10 02:33:27.980108', 'step': 6369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:28.011410', 'step': 6369, 'epoch': 1} {'type': 'loss', 'content': 0.17426912486553192, 'timestamp': '2025-09-10 02:33:28.014296', 'step': 6370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:28.045484', 'step': 6370, 'epoch': 1} {'type': 'loss', 'content': 0.15367330610752106, 'timestamp': '2025-09-10 02:33:28.052564', 'step': 6371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:28.095055', 'step': 6371, 'epoch': 1} {'type': 'loss', 'content': 0.07622568309307098, 'timestamp': '2025-09-10 02:33:28.124288', 'step': 6372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:28.161930', 'step': 6372, 'epoch': 1} {'type': 'loss', 'content': 0.18478940427303314, 'timestamp': '2025-09-10 02:33:28.164505', 'step': 6373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:28.194999', 'step': 6373, 'epoch': 1} {'type': 'loss', 'content': 0.09558729827404022, 'timestamp': '2025-09-10 02:33:28.198447', 'step': 6374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:28.232655', 'step': 6374, 'epoch': 1} {'type': 'loss', 'content': 0.14983850717544556, 'timestamp': '2025-09-10 02:33:28.235905', 'step': 6375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:28.265812', 'step': 6375, 'epoch': 1} {'type': 'loss', 'content': 0.22095584869384766, 'timestamp': '2025-09-10 02:33:28.289342', 'step': 6376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:28.320302', 'step': 6376, 'epoch': 1} {'type': 'loss', 'content': 0.17529737949371338, 'timestamp': '2025-09-10 02:33:28.325442', 'step': 6377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:28.365365', 'step': 6377, 'epoch': 1} {'type': 'loss', 'content': 0.12216344475746155, 'timestamp': '2025-09-10 02:33:28.368850', 'step': 6378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:28.404374', 'step': 6378, 'epoch': 1} {'type': 'loss', 'content': 0.16308048367500305, 'timestamp': '2025-09-10 02:33:28.409232', 'step': 6379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:28.442455', 'step': 6379, 'epoch': 1} {'type': 'loss', 'content': 0.18344685435295105, 'timestamp': '2025-09-10 02:33:28.469780', 'step': 6380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:28.501700', 'step': 6380, 'epoch': 1} {'type': 'loss', 'content': 0.10794713348150253, 'timestamp': '2025-09-10 02:33:28.505001', 'step': 6381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:28.542891', 'step': 6381, 'epoch': 1} {'type': 'loss', 'content': 0.15307052433490753, 'timestamp': '2025-09-10 02:33:28.545066', 'step': 6382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:28.586646', 'step': 6382, 'epoch': 1} {'type': 'loss', 'content': 0.12342612445354462, 'timestamp': '2025-09-10 02:33:28.589561', 'step': 6383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:28.619616', 'step': 6383, 'epoch': 1} {'type': 'loss', 'content': 0.1053929477930069, 'timestamp': '2025-09-10 02:33:28.643118', 'step': 6384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:28.677135', 'step': 6384, 'epoch': 1} {'type': 'loss', 'content': 0.20292915403842926, 'timestamp': '2025-09-10 02:33:28.679356', 'step': 6385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:28.708777', 'step': 6385, 'epoch': 1} {'type': 'loss', 'content': 0.21842825412750244, 'timestamp': '2025-09-10 02:33:28.712040', 'step': 6386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:28.743661', 'step': 6386, 'epoch': 1} {'type': 'loss', 'content': 0.21623796224594116, 'timestamp': '2025-09-10 02:33:28.746381', 'step': 6387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:28.776695', 'step': 6387, 'epoch': 1} {'type': 'loss', 'content': 0.20712323486804962, 'timestamp': '2025-09-10 02:33:28.800205', 'step': 6388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:28.831350', 'step': 6388, 'epoch': 1} {'type': 'loss', 'content': 0.16805966198444366, 'timestamp': '2025-09-10 02:33:28.834396', 'step': 6389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:28.865135', 'step': 6389, 'epoch': 1} {'type': 'loss', 'content': 0.13402171432971954, 'timestamp': '2025-09-10 02:33:28.867924', 'step': 6390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:28.904216', 'step': 6390, 'epoch': 1} {'type': 'loss', 'content': 0.1155913770198822, 'timestamp': '2025-09-10 02:33:28.906755', 'step': 6391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:28.938088', 'step': 6391, 'epoch': 1} {'type': 'loss', 'content': 0.2332688868045807, 'timestamp': '2025-09-10 02:33:28.961524', 'step': 6392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:28.993151', 'step': 6392, 'epoch': 1} {'type': 'loss', 'content': 0.1335381120443344, 'timestamp': '2025-09-10 02:33:28.996104', 'step': 6393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:29.026853', 'step': 6393, 'epoch': 1} {'type': 'loss', 'content': 0.17021942138671875, 'timestamp': '2025-09-10 02:33:29.029474', 'step': 6394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:29.059711', 'step': 6394, 'epoch': 1} {'type': 'loss', 'content': 0.14867009222507477, 'timestamp': '2025-09-10 02:33:29.063950', 'step': 6395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:29.095530', 'step': 6395, 'epoch': 1} {'type': 'loss', 'content': 0.21071679890155792, 'timestamp': '2025-09-10 02:33:29.120790', 'step': 6396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:29.151606', 'step': 6396, 'epoch': 1} {'type': 'loss', 'content': 0.2184070497751236, 'timestamp': '2025-09-10 02:33:29.154255', 'step': 6397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.185092', 'step': 6397, 'epoch': 1} {'type': 'loss', 'content': 0.17918679118156433, 'timestamp': '2025-09-10 02:33:29.187818', 'step': 6398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:29.218525', 'step': 6398, 'epoch': 1} {'type': 'loss', 'content': 0.20367151498794556, 'timestamp': '2025-09-10 02:33:29.220931', 'step': 6399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.251468', 'step': 6399, 'epoch': 1} {'type': 'loss', 'content': 0.11409218609333038, 'timestamp': '2025-09-10 02:33:29.275058', 'step': 6400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.306588', 'step': 6400, 'epoch': 1} {'type': 'loss', 'content': 0.17407581210136414, 'timestamp': '2025-09-10 02:33:29.309368', 'step': 6401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.340000', 'step': 6401, 'epoch': 1} {'type': 'loss', 'content': 0.1798572540283203, 'timestamp': '2025-09-10 02:33:29.342991', 'step': 6402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.374002', 'step': 6402, 'epoch': 1} {'type': 'loss', 'content': 0.16513608396053314, 'timestamp': '2025-09-10 02:33:29.376521', 'step': 6403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:29.407599', 'step': 6403, 'epoch': 1} {'type': 'loss', 'content': 0.1290450543165207, 'timestamp': '2025-09-10 02:33:29.431618', 'step': 6404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:29.462929', 'step': 6404, 'epoch': 1} {'type': 'loss', 'content': 0.21248994767665863, 'timestamp': '2025-09-10 02:33:29.465345', 'step': 6405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.498428', 'step': 6405, 'epoch': 1} {'type': 'loss', 'content': 0.07011755555868149, 'timestamp': '2025-09-10 02:33:29.501213', 'step': 6406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:29.533121', 'step': 6406, 'epoch': 1} {'type': 'loss', 'content': 0.17513036727905273, 'timestamp': '2025-09-10 02:33:29.536092', 'step': 6407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:29.568044', 'step': 6407, 'epoch': 1} {'type': 'loss', 'content': 0.10820456594228745, 'timestamp': '2025-09-10 02:33:29.592765', 'step': 6408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.624084', 'step': 6408, 'epoch': 1} {'type': 'loss', 'content': 0.12672477960586548, 'timestamp': '2025-09-10 02:33:29.626503', 'step': 6409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.656530', 'step': 6409, 'epoch': 1} {'type': 'loss', 'content': 0.13933609426021576, 'timestamp': '2025-09-10 02:33:29.658984', 'step': 6410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.690308', 'step': 6410, 'epoch': 1} {'type': 'loss', 'content': 0.08238948881626129, 'timestamp': '2025-09-10 02:33:29.692715', 'step': 6411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:29.722513', 'step': 6411, 'epoch': 1} {'type': 'loss', 'content': 0.12805302441120148, 'timestamp': '2025-09-10 02:33:29.748137', 'step': 6412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:33:29.778718', 'step': 6412, 'epoch': 1} {'type': 'loss', 'content': 0.12271632254123688, 'timestamp': '2025-09-10 02:33:29.781140', 'step': 6413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:29.811296', 'step': 6413, 'epoch': 1} {'type': 'loss', 'content': 0.1848893165588379, 'timestamp': '2025-09-10 02:33:29.814054', 'step': 6414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:29.844543', 'step': 6414, 'epoch': 1} {'type': 'loss', 'content': 0.12818926572799683, 'timestamp': '2025-09-10 02:33:29.846864', 'step': 6415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.876744', 'step': 6415, 'epoch': 1} {'type': 'loss', 'content': 0.12605613470077515, 'timestamp': '2025-09-10 02:33:29.900119', 'step': 6416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:29.930521', 'step': 6416, 'epoch': 1} {'type': 'loss', 'content': 0.1155620887875557, 'timestamp': '2025-09-10 02:33:29.933350', 'step': 6417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:29.965497', 'step': 6417, 'epoch': 1} {'type': 'loss', 'content': 0.08064092695713043, 'timestamp': '2025-09-10 02:33:29.968138', 'step': 6418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.000838', 'step': 6418, 'epoch': 1} {'type': 'loss', 'content': 0.1741108000278473, 'timestamp': '2025-09-10 02:33:30.003255', 'step': 6419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.033751', 'step': 6419, 'epoch': 1} {'type': 'loss', 'content': 0.18175122141838074, 'timestamp': '2025-09-10 02:33:30.057647', 'step': 6420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.087885', 'step': 6420, 'epoch': 1} {'type': 'loss', 'content': 0.1605233997106552, 'timestamp': '2025-09-10 02:33:30.090544', 'step': 6421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.121182', 'step': 6421, 'epoch': 1} {'type': 'loss', 'content': 0.23289614915847778, 'timestamp': '2025-09-10 02:33:30.123901', 'step': 6422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.154387', 'step': 6422, 'epoch': 1} {'type': 'loss', 'content': 0.12835593521595, 'timestamp': '2025-09-10 02:33:30.156942', 'step': 6423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:30.187542', 'step': 6423, 'epoch': 1} {'type': 'loss', 'content': 0.09920643270015717, 'timestamp': '2025-09-10 02:33:30.211552', 'step': 6424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.243682', 'step': 6424, 'epoch': 1} {'type': 'loss', 'content': 0.04475247114896774, 'timestamp': '2025-09-10 02:33:30.246244', 'step': 6425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.276672', 'step': 6425, 'epoch': 1} {'type': 'loss', 'content': 0.11521212011575699, 'timestamp': '2025-09-10 02:33:30.280355', 'step': 6426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.310349', 'step': 6426, 'epoch': 1} {'type': 'loss', 'content': 0.14262743294239044, 'timestamp': '2025-09-10 02:33:30.312983', 'step': 6427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:30.343995', 'step': 6427, 'epoch': 1} {'type': 'loss', 'content': 0.2031501978635788, 'timestamp': '2025-09-10 02:33:30.367566', 'step': 6428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.398180', 'step': 6428, 'epoch': 1} {'type': 'loss', 'content': 0.12121482938528061, 'timestamp': '2025-09-10 02:33:30.400430', 'step': 6429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:30.431144', 'step': 6429, 'epoch': 1} {'type': 'loss', 'content': 0.16645188629627228, 'timestamp': '2025-09-10 02:33:30.434177', 'step': 6430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.469017', 'step': 6430, 'epoch': 1} {'type': 'loss', 'content': 0.16814491152763367, 'timestamp': '2025-09-10 02:33:30.473536', 'step': 6431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:30.508293', 'step': 6431, 'epoch': 1} {'type': 'loss', 'content': 0.1823224425315857, 'timestamp': '2025-09-10 02:33:30.531927', 'step': 6432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.562825', 'step': 6432, 'epoch': 1} {'type': 'loss', 'content': 0.15411502122879028, 'timestamp': '2025-09-10 02:33:30.565297', 'step': 6433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:30.595519', 'step': 6433, 'epoch': 1} {'type': 'loss', 'content': 0.08663134276866913, 'timestamp': '2025-09-10 02:33:30.598345', 'step': 6434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.629718', 'step': 6434, 'epoch': 1} {'type': 'loss', 'content': 0.18530651926994324, 'timestamp': '2025-09-10 02:33:30.632120', 'step': 6435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.661937', 'step': 6435, 'epoch': 1} {'type': 'loss', 'content': 0.16471156477928162, 'timestamp': '2025-09-10 02:33:30.685636', 'step': 6436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.716188', 'step': 6436, 'epoch': 1} {'type': 'loss', 'content': 0.16383028030395508, 'timestamp': '2025-09-10 02:33:30.718887', 'step': 6437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.749646', 'step': 6437, 'epoch': 1} {'type': 'loss', 'content': 0.13023638725280762, 'timestamp': '2025-09-10 02:33:30.752247', 'step': 6438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.784494', 'step': 6438, 'epoch': 1} {'type': 'loss', 'content': 0.1349882334470749, 'timestamp': '2025-09-10 02:33:30.786763', 'step': 6439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.816936', 'step': 6439, 'epoch': 1} {'type': 'loss', 'content': 0.1598176658153534, 'timestamp': '2025-09-10 02:33:30.840645', 'step': 6440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.870937', 'step': 6440, 'epoch': 1} {'type': 'loss', 'content': 0.16079609096050262, 'timestamp': '2025-09-10 02:33:30.873437', 'step': 6441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.903557', 'step': 6441, 'epoch': 1} {'type': 'loss', 'content': 0.14669238030910492, 'timestamp': '2025-09-10 02:33:30.905977', 'step': 6442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:30.937363', 'step': 6442, 'epoch': 1} {'type': 'loss', 'content': 0.08165421336889267, 'timestamp': '2025-09-10 02:33:30.939891', 'step': 6443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:30.969813', 'step': 6443, 'epoch': 1} {'type': 'loss', 'content': 0.16157737374305725, 'timestamp': '2025-09-10 02:33:30.993597', 'step': 6444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:31.024017', 'step': 6444, 'epoch': 1} {'type': 'loss', 'content': 0.17091453075408936, 'timestamp': '2025-09-10 02:33:31.026808', 'step': 6445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.057388', 'step': 6445, 'epoch': 1} {'type': 'loss', 'content': 0.21690207719802856, 'timestamp': '2025-09-10 02:33:31.059683', 'step': 6446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:31.089975', 'step': 6446, 'epoch': 1} {'type': 'loss', 'content': 0.2298431396484375, 'timestamp': '2025-09-10 02:33:31.092505', 'step': 6447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:31.122970', 'step': 6447, 'epoch': 1} {'type': 'loss', 'content': 0.24656492471694946, 'timestamp': '2025-09-10 02:33:31.146602', 'step': 6448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.177123', 'step': 6448, 'epoch': 1} {'type': 'loss', 'content': 0.14516350626945496, 'timestamp': '2025-09-10 02:33:31.179573', 'step': 6449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:31.209965', 'step': 6449, 'epoch': 1} {'type': 'loss', 'content': 0.10476112365722656, 'timestamp': '2025-09-10 02:33:31.212847', 'step': 6450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.243145', 'step': 6450, 'epoch': 1} {'type': 'loss', 'content': 0.1202094778418541, 'timestamp': '2025-09-10 02:33:31.245750', 'step': 6451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:31.276735', 'step': 6451, 'epoch': 1} {'type': 'loss', 'content': 0.14000612497329712, 'timestamp': '2025-09-10 02:33:31.300228', 'step': 6452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.331240', 'step': 6452, 'epoch': 1} {'type': 'loss', 'content': 0.15193046629428864, 'timestamp': '2025-09-10 02:33:31.333359', 'step': 6453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.363262', 'step': 6453, 'epoch': 1} {'type': 'loss', 'content': 0.13335835933685303, 'timestamp': '2025-09-10 02:33:31.365764', 'step': 6454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:31.397152', 'step': 6454, 'epoch': 1} {'type': 'loss', 'content': 0.1363525241613388, 'timestamp': '2025-09-10 02:33:31.399856', 'step': 6455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:31.429799', 'step': 6455, 'epoch': 1} {'type': 'loss', 'content': 0.14207971096038818, 'timestamp': '2025-09-10 02:33:31.453743', 'step': 6456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:31.490286', 'step': 6456, 'epoch': 1} {'type': 'loss', 'content': 0.13752639293670654, 'timestamp': '2025-09-10 02:33:31.494379', 'step': 6457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:31.524588', 'step': 6457, 'epoch': 1} {'type': 'loss', 'content': 0.08730274438858032, 'timestamp': '2025-09-10 02:33:31.527103', 'step': 6458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.557024', 'step': 6458, 'epoch': 1} {'type': 'loss', 'content': 0.173014298081398, 'timestamp': '2025-09-10 02:33:31.561512', 'step': 6459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:31.592788', 'step': 6459, 'epoch': 1} {'type': 'loss', 'content': 0.13561704754829407, 'timestamp': '2025-09-10 02:33:31.617459', 'step': 6460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.648890', 'step': 6460, 'epoch': 1} {'type': 'loss', 'content': 0.15682533383369446, 'timestamp': '2025-09-10 02:33:31.651327', 'step': 6461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:31.680624', 'step': 6461, 'epoch': 1} {'type': 'loss', 'content': 0.09435994178056717, 'timestamp': '2025-09-10 02:33:31.683096', 'step': 6462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:31.712883', 'step': 6462, 'epoch': 1} {'type': 'loss', 'content': 0.20996403694152832, 'timestamp': '2025-09-10 02:33:31.715379', 'step': 6463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:31.745438', 'step': 6463, 'epoch': 1} {'type': 'loss', 'content': 0.17877155542373657, 'timestamp': '2025-09-10 02:33:31.768784', 'step': 6464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.799595', 'step': 6464, 'epoch': 1} {'type': 'loss', 'content': 0.18748250603675842, 'timestamp': '2025-09-10 02:33:31.801601', 'step': 6465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.830812', 'step': 6465, 'epoch': 1} {'type': 'loss', 'content': 0.09400652348995209, 'timestamp': '2025-09-10 02:33:31.833235', 'step': 6466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:31.864005', 'step': 6466, 'epoch': 1} {'type': 'loss', 'content': 0.10893864184617996, 'timestamp': '2025-09-10 02:33:31.866254', 'step': 6467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.895948', 'step': 6467, 'epoch': 1} {'type': 'loss', 'content': 0.2510603964328766, 'timestamp': '2025-09-10 02:33:31.921290', 'step': 6468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.952569', 'step': 6468, 'epoch': 1} {'type': 'loss', 'content': 0.15922728180885315, 'timestamp': '2025-09-10 02:33:31.955346', 'step': 6469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:31.989108', 'step': 6469, 'epoch': 1} {'type': 'loss', 'content': 0.08857260644435883, 'timestamp': '2025-09-10 02:33:31.993233', 'step': 6470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:32.025178', 'step': 6470, 'epoch': 1} {'type': 'loss', 'content': 0.16722756624221802, 'timestamp': '2025-09-10 02:33:32.027581', 'step': 6471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.057851', 'step': 6471, 'epoch': 1} {'type': 'loss', 'content': 0.07512620091438293, 'timestamp': '2025-09-10 02:33:32.084092', 'step': 6472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.113989', 'step': 6472, 'epoch': 1} {'type': 'loss', 'content': 0.11758212000131607, 'timestamp': '2025-09-10 02:33:32.116645', 'step': 6473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:32.147630', 'step': 6473, 'epoch': 1} {'type': 'loss', 'content': 0.10653399676084518, 'timestamp': '2025-09-10 02:33:32.150022', 'step': 6474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:32.179553', 'step': 6474, 'epoch': 1} {'type': 'loss', 'content': 0.2102806270122528, 'timestamp': '2025-09-10 02:33:32.182079', 'step': 6475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:32.211644', 'step': 6475, 'epoch': 1} {'type': 'loss', 'content': 0.1135374903678894, 'timestamp': '2025-09-10 02:33:32.235276', 'step': 6476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.270468', 'step': 6476, 'epoch': 1} {'type': 'loss', 'content': 0.13043634593486786, 'timestamp': '2025-09-10 02:33:32.273869', 'step': 6477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.306729', 'step': 6477, 'epoch': 1} {'type': 'loss', 'content': 0.14513815939426422, 'timestamp': '2025-09-10 02:33:32.309198', 'step': 6478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.338721', 'step': 6478, 'epoch': 1} {'type': 'loss', 'content': 0.19966937601566315, 'timestamp': '2025-09-10 02:33:32.341134', 'step': 6479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.371313', 'step': 6479, 'epoch': 1} {'type': 'loss', 'content': 0.1386263370513916, 'timestamp': '2025-09-10 02:33:32.394817', 'step': 6480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.424859', 'step': 6480, 'epoch': 1} {'type': 'loss', 'content': 0.18357305228710175, 'timestamp': '2025-09-10 02:33:32.427227', 'step': 6481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:32.457326', 'step': 6481, 'epoch': 1} {'type': 'loss', 'content': 0.14115679264068604, 'timestamp': '2025-09-10 02:33:32.460103', 'step': 6482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:32.492890', 'step': 6482, 'epoch': 1} {'type': 'loss', 'content': 0.15291398763656616, 'timestamp': '2025-09-10 02:33:32.496659', 'step': 6483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.526162', 'step': 6483, 'epoch': 1} {'type': 'loss', 'content': 0.11822380125522614, 'timestamp': '2025-09-10 02:33:32.549775', 'step': 6484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:32.579931', 'step': 6484, 'epoch': 1} {'type': 'loss', 'content': 0.18521545827388763, 'timestamp': '2025-09-10 02:33:32.582339', 'step': 6485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:32.612192', 'step': 6485, 'epoch': 1} {'type': 'loss', 'content': 0.12683357298374176, 'timestamp': '2025-09-10 02:33:32.615688', 'step': 6486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.646902', 'step': 6486, 'epoch': 1} {'type': 'loss', 'content': 0.16176362335681915, 'timestamp': '2025-09-10 02:33:32.649128', 'step': 6487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:32.678303', 'step': 6487, 'epoch': 1} {'type': 'loss', 'content': 0.11862290650606155, 'timestamp': '2025-09-10 02:33:32.702222', 'step': 6488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:32.733511', 'step': 6488, 'epoch': 1} {'type': 'loss', 'content': 0.1487869918346405, 'timestamp': '2025-09-10 02:33:32.735859', 'step': 6489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:32.765678', 'step': 6489, 'epoch': 1} {'type': 'loss', 'content': 0.1374748796224594, 'timestamp': '2025-09-10 02:33:32.768312', 'step': 6490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:32.798173', 'step': 6490, 'epoch': 1} {'type': 'loss', 'content': 0.1345912218093872, 'timestamp': '2025-09-10 02:33:32.800544', 'step': 6491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:32.830202', 'step': 6491, 'epoch': 1} {'type': 'loss', 'content': 0.13448524475097656, 'timestamp': '2025-09-10 02:33:32.854002', 'step': 6492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.883719', 'step': 6492, 'epoch': 1} {'type': 'loss', 'content': 0.18485277891159058, 'timestamp': '2025-09-10 02:33:32.886277', 'step': 6493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:32.915707', 'step': 6493, 'epoch': 1} {'type': 'loss', 'content': 0.15253056585788727, 'timestamp': '2025-09-10 02:33:32.918284', 'step': 6494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:32.948357', 'step': 6494, 'epoch': 1} {'type': 'loss', 'content': 0.15677136182785034, 'timestamp': '2025-09-10 02:33:32.950487', 'step': 6495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:32.979786', 'step': 6495, 'epoch': 1} {'type': 'loss', 'content': 0.15187065303325653, 'timestamp': '2025-09-10 02:33:33.003559', 'step': 6496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:33.033550', 'step': 6496, 'epoch': 1} {'type': 'loss', 'content': 0.16464735567569733, 'timestamp': '2025-09-10 02:33:33.035916', 'step': 6497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:33.066459', 'step': 6497, 'epoch': 1} {'type': 'loss', 'content': 0.16313530504703522, 'timestamp': '2025-09-10 02:33:33.068639', 'step': 6498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:33.098432', 'step': 6498, 'epoch': 1} {'type': 'loss', 'content': 0.1983707696199417, 'timestamp': '2025-09-10 02:33:33.100508', 'step': 6499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:33.131011', 'step': 6499, 'epoch': 1} {'type': 'loss', 'content': 0.2614513635635376, 'timestamp': '2025-09-10 02:33:33.154681', 'step': 6500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-09-10 02:33:37.871776', 'step': 6500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:37.908152', 'step': 6500, 'epoch': 1} {'type': 'loss', 'content': 0.209006205201149, 'timestamp': '2025-09-10 02:33:37.911386', 'step': 6501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:37.942008', 'step': 6501, 'epoch': 1} {'type': 'loss', 'content': 0.18843556940555573, 'timestamp': '2025-09-10 02:33:37.944621', 'step': 6502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:37.975285', 'step': 6502, 'epoch': 1} {'type': 'loss', 'content': 0.16501596570014954, 'timestamp': '2025-09-10 02:33:37.978850', 'step': 6503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:38.008632', 'step': 6503, 'epoch': 1} {'type': 'loss', 'content': 0.19679975509643555, 'timestamp': '2025-09-10 02:33:38.032639', 'step': 6504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.065786', 'step': 6504, 'epoch': 1} {'type': 'loss', 'content': 0.13475410640239716, 'timestamp': '2025-09-10 02:33:38.068195', 'step': 6505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.097838', 'step': 6505, 'epoch': 1} {'type': 'loss', 'content': 0.08858266472816467, 'timestamp': '2025-09-10 02:33:38.100376', 'step': 6506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:38.130633', 'step': 6506, 'epoch': 1} {'type': 'loss', 'content': 0.1471785008907318, 'timestamp': '2025-09-10 02:33:38.134585', 'step': 6507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:38.167188', 'step': 6507, 'epoch': 1} {'type': 'loss', 'content': 0.16368259489536285, 'timestamp': '2025-09-10 02:33:38.190923', 'step': 6508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:38.220589', 'step': 6508, 'epoch': 1} {'type': 'loss', 'content': 0.1678605079650879, 'timestamp': '2025-09-10 02:33:38.222940', 'step': 6509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.252838', 'step': 6509, 'epoch': 1} {'type': 'loss', 'content': 0.11119476705789566, 'timestamp': '2025-09-10 02:33:38.256020', 'step': 6510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.286791', 'step': 6510, 'epoch': 1} {'type': 'loss', 'content': 0.12833577394485474, 'timestamp': '2025-09-10 02:33:38.289490', 'step': 6511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.320293', 'step': 6511, 'epoch': 1} {'type': 'loss', 'content': 0.18942047655582428, 'timestamp': '2025-09-10 02:33:38.343910', 'step': 6512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:38.375113', 'step': 6512, 'epoch': 1} {'type': 'loss', 'content': 0.1046491414308548, 'timestamp': '2025-09-10 02:33:38.377766', 'step': 6513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.409228', 'step': 6513, 'epoch': 1} {'type': 'loss', 'content': 0.16740185022354126, 'timestamp': '2025-09-10 02:33:38.411715', 'step': 6514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:38.441784', 'step': 6514, 'epoch': 1} {'type': 'loss', 'content': 0.142877459526062, 'timestamp': '2025-09-10 02:33:38.444665', 'step': 6515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.475024', 'step': 6515, 'epoch': 1} {'type': 'loss', 'content': 0.18722575902938843, 'timestamp': '2025-09-10 02:33:38.499508', 'step': 6516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.530207', 'step': 6516, 'epoch': 1} {'type': 'loss', 'content': 0.10969963669776917, 'timestamp': '2025-09-10 02:33:38.532663', 'step': 6517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:38.564230', 'step': 6517, 'epoch': 1} {'type': 'loss', 'content': 0.14823241531848907, 'timestamp': '2025-09-10 02:33:38.566582', 'step': 6518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.596060', 'step': 6518, 'epoch': 1} {'type': 'loss', 'content': 0.1848827749490738, 'timestamp': '2025-09-10 02:33:38.598551', 'step': 6519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:38.629239', 'step': 6519, 'epoch': 1} {'type': 'loss', 'content': 0.1290847659111023, 'timestamp': '2025-09-10 02:33:38.652754', 'step': 6520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:38.689673', 'step': 6520, 'epoch': 1} {'type': 'loss', 'content': 0.11190316826105118, 'timestamp': '2025-09-10 02:33:38.695073', 'step': 6521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:38.734920', 'step': 6521, 'epoch': 1} {'type': 'loss', 'content': 0.16485832631587982, 'timestamp': '2025-09-10 02:33:38.739636', 'step': 6522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:38.778443', 'step': 6522, 'epoch': 1} {'type': 'loss', 'content': 0.11801616847515106, 'timestamp': '2025-09-10 02:33:38.782097', 'step': 6523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:38.818204', 'step': 6523, 'epoch': 1} {'type': 'loss', 'content': 0.16384325921535492, 'timestamp': '2025-09-10 02:33:38.842828', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:33:46.699118', 'step': 6524, 'epoch': 1} {'type': 'pplx', 'content': 8876.919805392994, 'timestamp': '2025-09-10 02:33:46.702804', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:46.732919', 'step': 6524, 'epoch': 1} {'type': 'loss', 'content': 0.10785510390996933, 'timestamp': '2025-09-10 02:33:46.735201', 'step': 6525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:46.767239', 'step': 6525, 'epoch': 1} {'type': 'loss', 'content': 0.08356201648712158, 'timestamp': '2025-09-10 02:33:46.769531', 'step': 6526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:46.801002', 'step': 6526, 'epoch': 1} {'type': 'loss', 'content': 0.1943540722131729, 'timestamp': '2025-09-10 02:33:46.803637', 'step': 6527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:46.833968', 'step': 6527, 'epoch': 1} {'type': 'loss', 'content': 0.10324598848819733, 'timestamp': '2025-09-10 02:33:46.857492', 'step': 6528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:46.888566', 'step': 6528, 'epoch': 1} {'type': 'loss', 'content': 0.14638197422027588, 'timestamp': '2025-09-10 02:33:46.892817', 'step': 6529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:46.926540', 'step': 6529, 'epoch': 1} {'type': 'loss', 'content': 0.25755777955055237, 'timestamp': '2025-09-10 02:33:46.929437', 'step': 6530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:46.961526', 'step': 6530, 'epoch': 1} {'type': 'loss', 'content': 0.17103491723537445, 'timestamp': '2025-09-10 02:33:46.964705', 'step': 6531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:46.995519', 'step': 6531, 'epoch': 1} {'type': 'loss', 'content': 0.12231738120317459, 'timestamp': '2025-09-10 02:33:47.019743', 'step': 6532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:47.051167', 'step': 6532, 'epoch': 1} {'type': 'loss', 'content': 0.09902329742908478, 'timestamp': '2025-09-10 02:33:47.053500', 'step': 6533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:47.085422', 'step': 6533, 'epoch': 1} {'type': 'loss', 'content': 0.1445639729499817, 'timestamp': '2025-09-10 02:33:47.087719', 'step': 6534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:47.121173', 'step': 6534, 'epoch': 1} {'type': 'loss', 'content': 0.08599872142076492, 'timestamp': '2025-09-10 02:33:47.123596', 'step': 6535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:47.154043', 'step': 6535, 'epoch': 1} {'type': 'loss', 'content': 0.13244949281215668, 'timestamp': '2025-09-10 02:33:47.177917', 'step': 6536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:47.210346', 'step': 6536, 'epoch': 1} {'type': 'loss', 'content': 0.06332661956548691, 'timestamp': '2025-09-10 02:33:47.212923', 'step': 6537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:47.244138', 'step': 6537, 'epoch': 1} {'type': 'loss', 'content': 0.16628597676753998, 'timestamp': '2025-09-10 02:33:47.246750', 'step': 6538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:47.278030', 'step': 6538, 'epoch': 1} {'type': 'loss', 'content': 0.1731543242931366, 'timestamp': '2025-09-10 02:33:47.280414', 'step': 6539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:47.311131', 'step': 6539, 'epoch': 1} {'type': 'loss', 'content': 0.15432141721248627, 'timestamp': '2025-09-10 02:33:47.335375', 'step': 6540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:47.365660', 'step': 6540, 'epoch': 1} {'type': 'loss', 'content': 0.1489066779613495, 'timestamp': '2025-09-10 02:33:47.368783', 'step': 6541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:47.399039', 'step': 6541, 'epoch': 1} {'type': 'loss', 'content': 0.12734723091125488, 'timestamp': '2025-09-10 02:33:47.401672', 'step': 6542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:47.432965', 'step': 6542, 'epoch': 1} {'type': 'loss', 'content': 0.18722978234291077, 'timestamp': '2025-09-10 02:33:47.435736', 'step': 6543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:47.466564', 'step': 6543, 'epoch': 1} {'type': 'loss', 'content': 0.1662549078464508, 'timestamp': '2025-09-10 02:33:47.490675', 'step': 6544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:47.521325', 'step': 6544, 'epoch': 1} {'type': 'loss', 'content': 0.0867108479142189, 'timestamp': '2025-09-10 02:33:47.523883', 'step': 6545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:47.555080', 'step': 6545, 'epoch': 1} {'type': 'loss', 'content': 0.1219283863902092, 'timestamp': '2025-09-10 02:33:47.558157', 'step': 6546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:47.589753', 'step': 6546, 'epoch': 1} {'type': 'loss', 'content': 0.059673603624105453, 'timestamp': '2025-09-10 02:33:47.592488', 'step': 6547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:47.625474', 'step': 6547, 'epoch': 1} {'type': 'loss', 'content': 0.11161380261182785, 'timestamp': '2025-09-10 02:33:47.649066', 'step': 6548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:47.679463', 'step': 6548, 'epoch': 1} {'type': 'loss', 'content': 0.18980272114276886, 'timestamp': '2025-09-10 02:33:47.681938', 'step': 6549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:47.712680', 'step': 6549, 'epoch': 1} {'type': 'loss', 'content': 0.20150552690029144, 'timestamp': '2025-09-10 02:33:47.715037', 'step': 6550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:47.746620', 'step': 6550, 'epoch': 1} {'type': 'loss', 'content': 0.19162943959236145, 'timestamp': '2025-09-10 02:33:47.749900', 'step': 6551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:47.781015', 'step': 6551, 'epoch': 1} {'type': 'loss', 'content': 0.16057784855365753, 'timestamp': '2025-09-10 02:33:47.804656', 'step': 6552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:47.835016', 'step': 6552, 'epoch': 1} {'type': 'loss', 'content': 0.18567757308483124, 'timestamp': '2025-09-10 02:33:47.837652', 'step': 6553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:47.867842', 'step': 6553, 'epoch': 1} {'type': 'loss', 'content': 0.0427151657640934, 'timestamp': '2025-09-10 02:33:47.870031', 'step': 6554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:47.920417', 'step': 6554, 'epoch': 1} {'type': 'loss', 'content': 0.1575419008731842, 'timestamp': '2025-09-10 02:33:47.923157', 'step': 6555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:47.954015', 'step': 6555, 'epoch': 1} {'type': 'loss', 'content': 0.20121164619922638, 'timestamp': '2025-09-10 02:33:47.977817', 'step': 6556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:48.007781', 'step': 6556, 'epoch': 1} {'type': 'loss', 'content': 0.12772192060947418, 'timestamp': '2025-09-10 02:33:48.009993', 'step': 6557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:33:48.040782', 'step': 6557, 'epoch': 1} {'type': 'loss', 'content': 0.132744699716568, 'timestamp': '2025-09-10 02:33:48.044990', 'step': 6558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:48.075533', 'step': 6558, 'epoch': 1} {'type': 'loss', 'content': 0.21842065453529358, 'timestamp': '2025-09-10 02:33:48.077925', 'step': 6559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.108023', 'step': 6559, 'epoch': 1} {'type': 'loss', 'content': 0.1633339375257492, 'timestamp': '2025-09-10 02:33:48.131271', 'step': 6560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.161786', 'step': 6560, 'epoch': 1} {'type': 'loss', 'content': 0.13776150345802307, 'timestamp': '2025-09-10 02:33:48.164319', 'step': 6561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.194380', 'step': 6561, 'epoch': 1} {'type': 'loss', 'content': 0.1802656203508377, 'timestamp': '2025-09-10 02:33:48.200759', 'step': 6562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.230857', 'step': 6562, 'epoch': 1} {'type': 'loss', 'content': 0.20562905073165894, 'timestamp': '2025-09-10 02:33:48.233268', 'step': 6563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:48.263577', 'step': 6563, 'epoch': 1} {'type': 'loss', 'content': 0.2450021505355835, 'timestamp': '2025-09-10 02:33:48.287257', 'step': 6564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:48.318211', 'step': 6564, 'epoch': 1} {'type': 'loss', 'content': 0.11962909996509552, 'timestamp': '2025-09-10 02:33:48.322076', 'step': 6565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.353931', 'step': 6565, 'epoch': 1} {'type': 'loss', 'content': 0.13668197393417358, 'timestamp': '2025-09-10 02:33:48.356438', 'step': 6566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.386839', 'step': 6566, 'epoch': 1} {'type': 'loss', 'content': 0.13542784750461578, 'timestamp': '2025-09-10 02:33:48.389111', 'step': 6567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.420384', 'step': 6567, 'epoch': 1} {'type': 'loss', 'content': 0.14674316346645355, 'timestamp': '2025-09-10 02:33:48.444269', 'step': 6568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.474845', 'step': 6568, 'epoch': 1} {'type': 'loss', 'content': 0.1378846913576126, 'timestamp': '2025-09-10 02:33:48.477563', 'step': 6569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:48.508178', 'step': 6569, 'epoch': 1} {'type': 'loss', 'content': 0.2633186876773834, 'timestamp': '2025-09-10 02:33:48.510853', 'step': 6570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:48.542558', 'step': 6570, 'epoch': 1} {'type': 'loss', 'content': 0.1502186506986618, 'timestamp': '2025-09-10 02:33:48.545013', 'step': 6571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:48.574990', 'step': 6571, 'epoch': 1} {'type': 'loss', 'content': 0.14356087148189545, 'timestamp': '2025-09-10 02:33:48.600119', 'step': 6572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.630950', 'step': 6572, 'epoch': 1} {'type': 'loss', 'content': 0.10520784556865692, 'timestamp': '2025-09-10 02:33:48.633408', 'step': 6573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:48.663118', 'step': 6573, 'epoch': 1} {'type': 'loss', 'content': 0.12568888068199158, 'timestamp': '2025-09-10 02:33:48.665453', 'step': 6574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.695481', 'step': 6574, 'epoch': 1} {'type': 'loss', 'content': 0.19312088191509247, 'timestamp': '2025-09-10 02:33:48.698268', 'step': 6575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:48.728730', 'step': 6575, 'epoch': 1} {'type': 'loss', 'content': 0.10905135422945023, 'timestamp': '2025-09-10 02:33:48.752425', 'step': 6576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:48.783544', 'step': 6576, 'epoch': 1} {'type': 'loss', 'content': 0.10959514230489731, 'timestamp': '2025-09-10 02:33:48.786196', 'step': 6577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.817307', 'step': 6577, 'epoch': 1} {'type': 'loss', 'content': 0.14469265937805176, 'timestamp': '2025-09-10 02:33:48.819704', 'step': 6578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:48.850120', 'step': 6578, 'epoch': 1} {'type': 'loss', 'content': 0.12891414761543274, 'timestamp': '2025-09-10 02:33:48.852338', 'step': 6579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:48.882894', 'step': 6579, 'epoch': 1} {'type': 'loss', 'content': 0.19405034184455872, 'timestamp': '2025-09-10 02:33:48.906487', 'step': 6580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:48.937852', 'step': 6580, 'epoch': 1} {'type': 'loss', 'content': 0.16643491387367249, 'timestamp': '2025-09-10 02:33:48.940906', 'step': 6581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:48.972522', 'step': 6581, 'epoch': 1} {'type': 'loss', 'content': 0.12027249485254288, 'timestamp': '2025-09-10 02:33:48.974900', 'step': 6582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:49.005090', 'step': 6582, 'epoch': 1} {'type': 'loss', 'content': 0.11782515794038773, 'timestamp': '2025-09-10 02:33:49.007828', 'step': 6583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:49.038464', 'step': 6583, 'epoch': 1} {'type': 'loss', 'content': 0.1906222552061081, 'timestamp': '2025-09-10 02:33:49.062683', 'step': 6584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:49.093317', 'step': 6584, 'epoch': 1} {'type': 'loss', 'content': 0.23070959746837616, 'timestamp': '2025-09-10 02:33:49.095699', 'step': 6585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:49.126191', 'step': 6585, 'epoch': 1} {'type': 'loss', 'content': 0.09511010348796844, 'timestamp': '2025-09-10 02:33:49.128592', 'step': 6586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:49.159026', 'step': 6586, 'epoch': 1} {'type': 'loss', 'content': 0.13195255398750305, 'timestamp': '2025-09-10 02:33:49.161552', 'step': 6587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:49.191730', 'step': 6587, 'epoch': 1} {'type': 'loss', 'content': 0.17101898789405823, 'timestamp': '2025-09-10 02:33:49.215397', 'step': 6588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:49.246983', 'step': 6588, 'epoch': 1} {'type': 'loss', 'content': 0.15297360718250275, 'timestamp': '2025-09-10 02:33:49.249422', 'step': 6589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:49.279917', 'step': 6589, 'epoch': 1} {'type': 'loss', 'content': 0.1329411268234253, 'timestamp': '2025-09-10 02:33:49.282410', 'step': 6590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:49.312518', 'step': 6590, 'epoch': 1} {'type': 'loss', 'content': 0.09476909041404724, 'timestamp': '2025-09-10 02:33:49.314752', 'step': 6591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:49.344857', 'step': 6591, 'epoch': 1} {'type': 'loss', 'content': 0.22626835107803345, 'timestamp': '2025-09-10 02:33:49.368463', 'step': 6592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:49.398723', 'step': 6592, 'epoch': 1} {'type': 'loss', 'content': 0.10353641957044601, 'timestamp': '2025-09-10 02:33:49.401229', 'step': 6593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:49.430840', 'step': 6593, 'epoch': 1} {'type': 'loss', 'content': 0.18064363300800323, 'timestamp': '2025-09-10 02:33:49.432919', 'step': 6594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:49.462965', 'step': 6594, 'epoch': 1} {'type': 'loss', 'content': 0.11602523177862167, 'timestamp': '2025-09-10 02:33:49.465548', 'step': 6595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:49.495365', 'step': 6595, 'epoch': 1} {'type': 'loss', 'content': 0.12927217781543732, 'timestamp': '2025-09-10 02:33:49.519268', 'step': 6596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:49.550501', 'step': 6596, 'epoch': 1} {'type': 'loss', 'content': 0.18947482109069824, 'timestamp': '2025-09-10 02:33:49.552992', 'step': 6597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:49.583151', 'step': 6597, 'epoch': 1} {'type': 'loss', 'content': 0.06930172443389893, 'timestamp': '2025-09-10 02:33:49.585581', 'step': 6598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:49.615955', 'step': 6598, 'epoch': 1} {'type': 'loss', 'content': 0.08623924106359482, 'timestamp': '2025-09-10 02:33:49.618491', 'step': 6599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:49.651418', 'step': 6599, 'epoch': 1} {'type': 'loss', 'content': 0.09274542331695557, 'timestamp': '2025-09-10 02:33:49.676266', 'step': 6600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:49.707024', 'step': 6600, 'epoch': 1} {'type': 'loss', 'content': 0.10653894394636154, 'timestamp': '2025-09-10 02:33:49.709316', 'step': 6601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:49.739270', 'step': 6601, 'epoch': 1} {'type': 'loss', 'content': 0.09701526165008545, 'timestamp': '2025-09-10 02:33:49.741867', 'step': 6602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:49.772448', 'step': 6602, 'epoch': 1} {'type': 'loss', 'content': 0.17269858717918396, 'timestamp': '2025-09-10 02:33:49.775122', 'step': 6603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:49.805792', 'step': 6603, 'epoch': 1} {'type': 'loss', 'content': 0.27603501081466675, 'timestamp': '2025-09-10 02:33:49.829402', 'step': 6604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:49.860017', 'step': 6604, 'epoch': 1} {'type': 'loss', 'content': 0.08148829638957977, 'timestamp': '2025-09-10 02:33:49.862218', 'step': 6605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:49.892181', 'step': 6605, 'epoch': 1} {'type': 'loss', 'content': 0.12221366912126541, 'timestamp': '2025-09-10 02:33:49.894920', 'step': 6606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:49.925871', 'step': 6606, 'epoch': 1} {'type': 'loss', 'content': 0.10752280801534653, 'timestamp': '2025-09-10 02:33:49.928426', 'step': 6607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:49.961366', 'step': 6607, 'epoch': 1} {'type': 'loss', 'content': 0.13338764011859894, 'timestamp': '2025-09-10 02:33:49.985218', 'step': 6608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:50.016248', 'step': 6608, 'epoch': 1} {'type': 'loss', 'content': 0.17301559448242188, 'timestamp': '2025-09-10 02:33:50.018516', 'step': 6609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:50.048684', 'step': 6609, 'epoch': 1} {'type': 'loss', 'content': 0.22813084721565247, 'timestamp': '2025-09-10 02:33:50.052809', 'step': 6610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:50.083152', 'step': 6610, 'epoch': 1} {'type': 'loss', 'content': 0.16683505475521088, 'timestamp': '2025-09-10 02:33:50.085427', 'step': 6611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.116052', 'step': 6611, 'epoch': 1} {'type': 'loss', 'content': 0.13541145622730255, 'timestamp': '2025-09-10 02:33:50.139711', 'step': 6612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.171604', 'step': 6612, 'epoch': 1} {'type': 'loss', 'content': 0.16332700848579407, 'timestamp': '2025-09-10 02:33:50.173899', 'step': 6613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.203957', 'step': 6613, 'epoch': 1} {'type': 'loss', 'content': 0.16475214064121246, 'timestamp': '2025-09-10 02:33:50.207311', 'step': 6614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.238265', 'step': 6614, 'epoch': 1} {'type': 'loss', 'content': 0.08159789443016052, 'timestamp': '2025-09-10 02:33:50.240582', 'step': 6615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.271360', 'step': 6615, 'epoch': 1} {'type': 'loss', 'content': 0.18095408380031586, 'timestamp': '2025-09-10 02:33:50.295072', 'step': 6616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:50.326452', 'step': 6616, 'epoch': 1} {'type': 'loss', 'content': 0.1235654279589653, 'timestamp': '2025-09-10 02:33:50.329387', 'step': 6617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:50.361276', 'step': 6617, 'epoch': 1} {'type': 'loss', 'content': 0.09724371880292892, 'timestamp': '2025-09-10 02:33:50.363694', 'step': 6618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.393774', 'step': 6618, 'epoch': 1} {'type': 'loss', 'content': 0.10262337327003479, 'timestamp': '2025-09-10 02:33:50.396277', 'step': 6619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.426190', 'step': 6619, 'epoch': 1} {'type': 'loss', 'content': 0.10412611812353134, 'timestamp': '2025-09-10 02:33:50.449914', 'step': 6620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:50.480315', 'step': 6620, 'epoch': 1} {'type': 'loss', 'content': 0.14880698919296265, 'timestamp': '2025-09-10 02:33:50.482789', 'step': 6621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:50.512865', 'step': 6621, 'epoch': 1} {'type': 'loss', 'content': 0.12246977537870407, 'timestamp': '2025-09-10 02:33:50.515212', 'step': 6622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.545404', 'step': 6622, 'epoch': 1} {'type': 'loss', 'content': 0.16088752448558807, 'timestamp': '2025-09-10 02:33:50.553711', 'step': 6623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.592229', 'step': 6623, 'epoch': 1} {'type': 'loss', 'content': 0.10997120290994644, 'timestamp': '2025-09-10 02:33:50.616427', 'step': 6624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:50.647000', 'step': 6624, 'epoch': 1} {'type': 'loss', 'content': 0.17495593428611755, 'timestamp': '2025-09-10 02:33:50.649422', 'step': 6625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.679988', 'step': 6625, 'epoch': 1} {'type': 'loss', 'content': 0.13828471302986145, 'timestamp': '2025-09-10 02:33:50.682366', 'step': 6626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:50.712487', 'step': 6626, 'epoch': 1} {'type': 'loss', 'content': 0.0829312652349472, 'timestamp': '2025-09-10 02:33:50.715152', 'step': 6627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:50.745447', 'step': 6627, 'epoch': 1} {'type': 'loss', 'content': 0.17851626873016357, 'timestamp': '2025-09-10 02:33:50.769588', 'step': 6628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.801101', 'step': 6628, 'epoch': 1} {'type': 'loss', 'content': 0.2038237601518631, 'timestamp': '2025-09-10 02:33:50.803936', 'step': 6629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.833558', 'step': 6629, 'epoch': 1} {'type': 'loss', 'content': 0.11477532982826233, 'timestamp': '2025-09-10 02:33:50.835984', 'step': 6630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:50.872534', 'step': 6630, 'epoch': 1} {'type': 'loss', 'content': 0.21280664205551147, 'timestamp': '2025-09-10 02:33:50.875099', 'step': 6631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:50.906191', 'step': 6631, 'epoch': 1} {'type': 'loss', 'content': 0.11093676835298538, 'timestamp': '2025-09-10 02:33:50.929742', 'step': 6632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:50.960257', 'step': 6632, 'epoch': 1} {'type': 'loss', 'content': 0.14829544723033905, 'timestamp': '2025-09-10 02:33:50.962735', 'step': 6633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:50.993777', 'step': 6633, 'epoch': 1} {'type': 'loss', 'content': 0.1457941234111786, 'timestamp': '2025-09-10 02:33:50.996351', 'step': 6634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:51.026595', 'step': 6634, 'epoch': 1} {'type': 'loss', 'content': 0.21735671162605286, 'timestamp': '2025-09-10 02:33:51.028914', 'step': 6635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:51.058671', 'step': 6635, 'epoch': 1} {'type': 'loss', 'content': 0.15168868005275726, 'timestamp': '2025-09-10 02:33:51.082946', 'step': 6636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:51.113298', 'step': 6636, 'epoch': 1} {'type': 'loss', 'content': 0.045801401138305664, 'timestamp': '2025-09-10 02:33:51.115555', 'step': 6637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:51.146186', 'step': 6637, 'epoch': 1} {'type': 'loss', 'content': 0.11545945703983307, 'timestamp': '2025-09-10 02:33:51.148502', 'step': 6638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:51.178305', 'step': 6638, 'epoch': 1} {'type': 'loss', 'content': 0.16280724108219147, 'timestamp': '2025-09-10 02:33:51.180628', 'step': 6639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:51.210864', 'step': 6639, 'epoch': 1} {'type': 'loss', 'content': 0.15325339138507843, 'timestamp': '2025-09-10 02:33:51.234864', 'step': 6640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:51.265163', 'step': 6640, 'epoch': 1} {'type': 'loss', 'content': 0.2658711075782776, 'timestamp': '2025-09-10 02:33:51.267692', 'step': 6641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:51.299030', 'step': 6641, 'epoch': 1} {'type': 'loss', 'content': 0.11858683824539185, 'timestamp': '2025-09-10 02:33:51.301566', 'step': 6642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:33:51.333081', 'step': 6642, 'epoch': 1} {'type': 'loss', 'content': 0.23064862191677094, 'timestamp': '2025-09-10 02:33:51.337355', 'step': 6643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:51.367536', 'step': 6643, 'epoch': 1} {'type': 'loss', 'content': 0.160427063703537, 'timestamp': '2025-09-10 02:33:51.391219', 'step': 6644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:51.421177', 'step': 6644, 'epoch': 1} {'type': 'loss', 'content': 0.10041961073875427, 'timestamp': '2025-09-10 02:33:51.423766', 'step': 6645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:51.454287', 'step': 6645, 'epoch': 1} {'type': 'loss', 'content': 0.10552901029586792, 'timestamp': '2025-09-10 02:33:51.456802', 'step': 6646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:51.486564', 'step': 6646, 'epoch': 1} {'type': 'loss', 'content': 0.11878526210784912, 'timestamp': '2025-09-10 02:33:51.488785', 'step': 6647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:51.518544', 'step': 6647, 'epoch': 1} {'type': 'loss', 'content': 0.09273602813482285, 'timestamp': '2025-09-10 02:33:51.542348', 'step': 6648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:51.575769', 'step': 6648, 'epoch': 1} {'type': 'loss', 'content': 0.24880710244178772, 'timestamp': '2025-09-10 02:33:51.578110', 'step': 6649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:51.607931', 'step': 6649, 'epoch': 1} {'type': 'loss', 'content': 0.21119438111782074, 'timestamp': '2025-09-10 02:33:51.610189', 'step': 6650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:51.640215', 'step': 6650, 'epoch': 1} {'type': 'loss', 'content': 0.09101380407810211, 'timestamp': '2025-09-10 02:33:51.643392', 'step': 6651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:51.673463', 'step': 6651, 'epoch': 1} {'type': 'loss', 'content': 0.22297132015228271, 'timestamp': '2025-09-10 02:33:51.696983', 'step': 6652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:51.730194', 'step': 6652, 'epoch': 1} {'type': 'loss', 'content': 0.223028302192688, 'timestamp': '2025-09-10 02:33:51.733096', 'step': 6653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:51.764265', 'step': 6653, 'epoch': 1} {'type': 'loss', 'content': 0.11073105782270432, 'timestamp': '2025-09-10 02:33:51.768083', 'step': 6654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:51.800623', 'step': 6654, 'epoch': 1} {'type': 'loss', 'content': 0.1762273758649826, 'timestamp': '2025-09-10 02:33:51.803341', 'step': 6655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:51.836605', 'step': 6655, 'epoch': 1} {'type': 'loss', 'content': 0.17604903876781464, 'timestamp': '2025-09-10 02:33:51.860433', 'step': 6656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:51.895670', 'step': 6656, 'epoch': 1} {'type': 'loss', 'content': 0.13991627097129822, 'timestamp': '2025-09-10 02:33:51.898350', 'step': 6657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:51.928048', 'step': 6657, 'epoch': 1} {'type': 'loss', 'content': 0.20105378329753876, 'timestamp': '2025-09-10 02:33:51.930578', 'step': 6658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:51.960377', 'step': 6658, 'epoch': 1} {'type': 'loss', 'content': 0.09508354961872101, 'timestamp': '2025-09-10 02:33:51.962851', 'step': 6659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:51.995298', 'step': 6659, 'epoch': 1} {'type': 'loss', 'content': 0.18965819478034973, 'timestamp': '2025-09-10 02:33:52.019206', 'step': 6660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:52.048974', 'step': 6660, 'epoch': 1} {'type': 'loss', 'content': 0.048628926277160645, 'timestamp': '2025-09-10 02:33:52.051071', 'step': 6661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:52.082435', 'step': 6661, 'epoch': 1} {'type': 'loss', 'content': 0.11366002261638641, 'timestamp': '2025-09-10 02:33:52.085019', 'step': 6662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:52.115570', 'step': 6662, 'epoch': 1} {'type': 'loss', 'content': 0.10900863260030746, 'timestamp': '2025-09-10 02:33:52.117844', 'step': 6663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:52.148219', 'step': 6663, 'epoch': 1} {'type': 'loss', 'content': 0.16840742528438568, 'timestamp': '2025-09-10 02:33:52.171974', 'step': 6664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:52.202536', 'step': 6664, 'epoch': 1} {'type': 'loss', 'content': 0.1576092541217804, 'timestamp': '2025-09-10 02:33:52.205733', 'step': 6665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:52.239297', 'step': 6665, 'epoch': 1} {'type': 'loss', 'content': 0.09321045130491257, 'timestamp': '2025-09-10 02:33:52.242175', 'step': 6666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:52.272361', 'step': 6666, 'epoch': 1} {'type': 'loss', 'content': 0.3340897560119629, 'timestamp': '2025-09-10 02:33:52.274970', 'step': 6667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:52.305198', 'step': 6667, 'epoch': 1} {'type': 'loss', 'content': 0.16230657696723938, 'timestamp': '2025-09-10 02:33:52.329285', 'step': 6668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:52.359737', 'step': 6668, 'epoch': 1} {'type': 'loss', 'content': 0.1334465593099594, 'timestamp': '2025-09-10 02:33:52.363406', 'step': 6669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:52.394492', 'step': 6669, 'epoch': 1} {'type': 'loss', 'content': 0.09489750862121582, 'timestamp': '2025-09-10 02:33:52.397338', 'step': 6670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:52.429302', 'step': 6670, 'epoch': 1} {'type': 'loss', 'content': 0.04319079592823982, 'timestamp': '2025-09-10 02:33:52.431824', 'step': 6671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:52.462175', 'step': 6671, 'epoch': 1} {'type': 'loss', 'content': 0.11292858421802521, 'timestamp': '2025-09-10 02:33:52.486492', 'step': 6672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:52.516948', 'step': 6672, 'epoch': 1} {'type': 'loss', 'content': 0.2115146517753601, 'timestamp': '2025-09-10 02:33:52.519363', 'step': 6673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:52.549011', 'step': 6673, 'epoch': 1} {'type': 'loss', 'content': 0.20554491877555847, 'timestamp': '2025-09-10 02:33:52.551401', 'step': 6674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:52.589025', 'step': 6674, 'epoch': 1} {'type': 'loss', 'content': 0.20334254205226898, 'timestamp': '2025-09-10 02:33:52.591350', 'step': 6675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:52.621442', 'step': 6675, 'epoch': 1} {'type': 'loss', 'content': 0.081781767308712, 'timestamp': '2025-09-10 02:33:52.645036', 'step': 6676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:52.676129', 'step': 6676, 'epoch': 1} {'type': 'loss', 'content': 0.10648728162050247, 'timestamp': '2025-09-10 02:33:52.678541', 'step': 6677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:52.709027', 'step': 6677, 'epoch': 1} {'type': 'loss', 'content': 0.14887464046478271, 'timestamp': '2025-09-10 02:33:52.711301', 'step': 6678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:52.740780', 'step': 6678, 'epoch': 1} {'type': 'loss', 'content': 0.2235025018453598, 'timestamp': '2025-09-10 02:33:52.743446', 'step': 6679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:52.774168', 'step': 6679, 'epoch': 1} {'type': 'loss', 'content': 0.18692249059677124, 'timestamp': '2025-09-10 02:33:52.797711', 'step': 6680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:52.828408', 'step': 6680, 'epoch': 1} {'type': 'loss', 'content': 0.07691849768161774, 'timestamp': '2025-09-10 02:33:52.830892', 'step': 6681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:52.862001', 'step': 6681, 'epoch': 1} {'type': 'loss', 'content': 0.16040799021720886, 'timestamp': '2025-09-10 02:33:52.870490', 'step': 6682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:52.913604', 'step': 6682, 'epoch': 1} {'type': 'loss', 'content': 0.22502215206623077, 'timestamp': '2025-09-10 02:33:52.921495', 'step': 6683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:52.961046', 'step': 6683, 'epoch': 1} {'type': 'loss', 'content': 0.17777737975120544, 'timestamp': '2025-09-10 02:33:52.985562', 'step': 6684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:53.016385', 'step': 6684, 'epoch': 1} {'type': 'loss', 'content': 0.18203027546405792, 'timestamp': '2025-09-10 02:33:53.018705', 'step': 6685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:53.048557', 'step': 6685, 'epoch': 1} {'type': 'loss', 'content': 0.09662088006734848, 'timestamp': '2025-09-10 02:33:53.050999', 'step': 6686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:53.081115', 'step': 6686, 'epoch': 1} {'type': 'loss', 'content': 0.1360631287097931, 'timestamp': '2025-09-10 02:33:53.083600', 'step': 6687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:53.113823', 'step': 6687, 'epoch': 1} {'type': 'loss', 'content': 0.17443296313285828, 'timestamp': '2025-09-10 02:33:53.137463', 'step': 6688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:53.167961', 'step': 6688, 'epoch': 1} {'type': 'loss', 'content': 0.23818339407444, 'timestamp': '2025-09-10 02:33:53.170423', 'step': 6689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:53.202133', 'step': 6689, 'epoch': 1} {'type': 'loss', 'content': 0.20259182155132294, 'timestamp': '2025-09-10 02:33:53.204594', 'step': 6690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:53.234387', 'step': 6690, 'epoch': 1} {'type': 'loss', 'content': 0.157718688249588, 'timestamp': '2025-09-10 02:33:53.238125', 'step': 6691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:53.271032', 'step': 6691, 'epoch': 1} {'type': 'loss', 'content': 0.12690699100494385, 'timestamp': '2025-09-10 02:33:53.296205', 'step': 6692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:53.327017', 'step': 6692, 'epoch': 1} {'type': 'loss', 'content': 0.18017302453517914, 'timestamp': '2025-09-10 02:33:53.329365', 'step': 6693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:53.359089', 'step': 6693, 'epoch': 1} {'type': 'loss', 'content': 0.1118040457367897, 'timestamp': '2025-09-10 02:33:53.361311', 'step': 6694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:53.390691', 'step': 6694, 'epoch': 1} {'type': 'loss', 'content': 0.13302861154079437, 'timestamp': '2025-09-10 02:33:53.393093', 'step': 6695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:53.421978', 'step': 6695, 'epoch': 1} {'type': 'loss', 'content': 0.11712747067213058, 'timestamp': '2025-09-10 02:33:53.445769', 'step': 6696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:53.476220', 'step': 6696, 'epoch': 1} {'type': 'loss', 'content': 0.16445322334766388, 'timestamp': '2025-09-10 02:33:53.479252', 'step': 6697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:53.511934', 'step': 6697, 'epoch': 1} {'type': 'loss', 'content': 0.18505430221557617, 'timestamp': '2025-09-10 02:33:53.515246', 'step': 6698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:53.546724', 'step': 6698, 'epoch': 1} {'type': 'loss', 'content': 0.17971666157245636, 'timestamp': '2025-09-10 02:33:53.549488', 'step': 6699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:53.579714', 'step': 6699, 'epoch': 1} {'type': 'loss', 'content': 0.11376003175973892, 'timestamp': '2025-09-10 02:33:53.603326', 'step': 6700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:53.634286', 'step': 6700, 'epoch': 1} {'type': 'loss', 'content': 0.2030334770679474, 'timestamp': '2025-09-10 02:33:53.637145', 'step': 6701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:53.668173', 'step': 6701, 'epoch': 1} {'type': 'loss', 'content': 0.23975040018558502, 'timestamp': '2025-09-10 02:33:53.670555', 'step': 6702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:53.703147', 'step': 6702, 'epoch': 1} {'type': 'loss', 'content': 0.13344430923461914, 'timestamp': '2025-09-10 02:33:53.706078', 'step': 6703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:53.737858', 'step': 6703, 'epoch': 1} {'type': 'loss', 'content': 0.13332554697990417, 'timestamp': '2025-09-10 02:33:53.761500', 'step': 6704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:53.791982', 'step': 6704, 'epoch': 1} {'type': 'loss', 'content': 0.18108361959457397, 'timestamp': '2025-09-10 02:33:53.794156', 'step': 6705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:33:53.824871', 'step': 6705, 'epoch': 1} {'type': 'loss', 'content': 0.1696765422821045, 'timestamp': '2025-09-10 02:33:53.829397', 'step': 6706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:53.859266', 'step': 6706, 'epoch': 1} {'type': 'loss', 'content': 0.1492769867181778, 'timestamp': '2025-09-10 02:33:53.861786', 'step': 6707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:53.891483', 'step': 6707, 'epoch': 1} {'type': 'loss', 'content': 0.1416434943675995, 'timestamp': '2025-09-10 02:33:53.915169', 'step': 6708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:53.945410', 'step': 6708, 'epoch': 1} {'type': 'loss', 'content': 0.09542803466320038, 'timestamp': '2025-09-10 02:33:53.948177', 'step': 6709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:53.978241', 'step': 6709, 'epoch': 1} {'type': 'loss', 'content': 0.24885597825050354, 'timestamp': '2025-09-10 02:33:53.980883', 'step': 6710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:54.011140', 'step': 6710, 'epoch': 1} {'type': 'loss', 'content': 0.23624901473522186, 'timestamp': '2025-09-10 02:33:54.013841', 'step': 6711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:54.043646', 'step': 6711, 'epoch': 1} {'type': 'loss', 'content': 0.17323032021522522, 'timestamp': '2025-09-10 02:33:54.067399', 'step': 6712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:54.097293', 'step': 6712, 'epoch': 1} {'type': 'loss', 'content': 0.11162865161895752, 'timestamp': '2025-09-10 02:33:54.099667', 'step': 6713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:54.130092', 'step': 6713, 'epoch': 1} {'type': 'loss', 'content': 0.17210698127746582, 'timestamp': '2025-09-10 02:33:54.132576', 'step': 6714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:54.164260', 'step': 6714, 'epoch': 1} {'type': 'loss', 'content': 0.10206575691699982, 'timestamp': '2025-09-10 02:33:54.166820', 'step': 6715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:54.197154', 'step': 6715, 'epoch': 1} {'type': 'loss', 'content': 0.16167718172073364, 'timestamp': '2025-09-10 02:33:54.220945', 'step': 6716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:33:54.252329', 'step': 6716, 'epoch': 1} {'type': 'loss', 'content': 0.16998016834259033, 'timestamp': '2025-09-10 02:33:54.257551', 'step': 6717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:54.287813', 'step': 6717, 'epoch': 1} {'type': 'loss', 'content': 0.13624924421310425, 'timestamp': '2025-09-10 02:33:54.290416', 'step': 6718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:54.321075', 'step': 6718, 'epoch': 1} {'type': 'loss', 'content': 0.15560154616832733, 'timestamp': '2025-09-10 02:33:54.323924', 'step': 6719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:54.354546', 'step': 6719, 'epoch': 1} {'type': 'loss', 'content': 0.14164429903030396, 'timestamp': '2025-09-10 02:33:54.378258', 'step': 6720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:54.407605', 'step': 6720, 'epoch': 1} {'type': 'loss', 'content': 0.20431718230247498, 'timestamp': '2025-09-10 02:33:54.409984', 'step': 6721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:54.439374', 'step': 6721, 'epoch': 1} {'type': 'loss', 'content': 0.28392669558525085, 'timestamp': '2025-09-10 02:33:54.441528', 'step': 6722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:54.471602', 'step': 6722, 'epoch': 1} {'type': 'loss', 'content': 0.19333133101463318, 'timestamp': '2025-09-10 02:33:54.473781', 'step': 6723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:54.503522', 'step': 6723, 'epoch': 1} {'type': 'loss', 'content': 0.10819108784198761, 'timestamp': '2025-09-10 02:33:54.527376', 'step': 6724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:54.557519', 'step': 6724, 'epoch': 1} {'type': 'loss', 'content': 0.1152234598994255, 'timestamp': '2025-09-10 02:33:54.559778', 'step': 6725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:54.592050', 'step': 6725, 'epoch': 1} {'type': 'loss', 'content': 0.10075914114713669, 'timestamp': '2025-09-10 02:33:54.594643', 'step': 6726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:54.624011', 'step': 6726, 'epoch': 1} {'type': 'loss', 'content': 0.10778255760669708, 'timestamp': '2025-09-10 02:33:54.626261', 'step': 6727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:54.655978', 'step': 6727, 'epoch': 1} {'type': 'loss', 'content': 0.09932055324316025, 'timestamp': '2025-09-10 02:33:54.679737', 'step': 6728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:54.710626', 'step': 6728, 'epoch': 1} {'type': 'loss', 'content': 0.16723358631134033, 'timestamp': '2025-09-10 02:33:54.713585', 'step': 6729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:54.744537', 'step': 6729, 'epoch': 1} {'type': 'loss', 'content': 0.2182615101337433, 'timestamp': '2025-09-10 02:33:54.747186', 'step': 6730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:54.778520', 'step': 6730, 'epoch': 1} {'type': 'loss', 'content': 0.21889053285121918, 'timestamp': '2025-09-10 02:33:54.781205', 'step': 6731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:54.811145', 'step': 6731, 'epoch': 1} {'type': 'loss', 'content': 0.23741643130779266, 'timestamp': '2025-09-10 02:33:54.835232', 'step': 6732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:54.865714', 'step': 6732, 'epoch': 1} {'type': 'loss', 'content': 0.1105312630534172, 'timestamp': '2025-09-10 02:33:54.868795', 'step': 6733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:54.899097', 'step': 6733, 'epoch': 1} {'type': 'loss', 'content': 0.17440393567085266, 'timestamp': '2025-09-10 02:33:54.902004', 'step': 6734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:54.933738', 'step': 6734, 'epoch': 1} {'type': 'loss', 'content': 0.06543842703104019, 'timestamp': '2025-09-10 02:33:54.936315', 'step': 6735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:54.969472', 'step': 6735, 'epoch': 1} {'type': 'loss', 'content': 0.1586328148841858, 'timestamp': '2025-09-10 02:33:54.996622', 'step': 6736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:55.026408', 'step': 6736, 'epoch': 1} {'type': 'loss', 'content': 0.17906400561332703, 'timestamp': '2025-09-10 02:33:55.029703', 'step': 6737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:55.060045', 'step': 6737, 'epoch': 1} {'type': 'loss', 'content': 0.14666864275932312, 'timestamp': '2025-09-10 02:33:55.062269', 'step': 6738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:55.091456', 'step': 6738, 'epoch': 1} {'type': 'loss', 'content': 0.1699158102273941, 'timestamp': '2025-09-10 02:33:55.093631', 'step': 6739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:55.123925', 'step': 6739, 'epoch': 1} {'type': 'loss', 'content': 0.13462676107883453, 'timestamp': '2025-09-10 02:33:55.147421', 'step': 6740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:55.179894', 'step': 6740, 'epoch': 1} {'type': 'loss', 'content': 0.1950012594461441, 'timestamp': '2025-09-10 02:33:55.183198', 'step': 6741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:55.217928', 'step': 6741, 'epoch': 1} {'type': 'loss', 'content': 0.14480653405189514, 'timestamp': '2025-09-10 02:33:55.222684', 'step': 6742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:55.252981', 'step': 6742, 'epoch': 1} {'type': 'loss', 'content': 0.14343231916427612, 'timestamp': '2025-09-10 02:33:55.256772', 'step': 6743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:55.287348', 'step': 6743, 'epoch': 1} {'type': 'loss', 'content': 0.12277244031429291, 'timestamp': '2025-09-10 02:33:55.311194', 'step': 6744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:55.349509', 'step': 6744, 'epoch': 1} {'type': 'loss', 'content': 0.12742957472801208, 'timestamp': '2025-09-10 02:33:55.355266', 'step': 6745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:55.388331', 'step': 6745, 'epoch': 1} {'type': 'loss', 'content': 0.13057367503643036, 'timestamp': '2025-09-10 02:33:55.390596', 'step': 6746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:55.421135', 'step': 6746, 'epoch': 1} {'type': 'loss', 'content': 0.1654507964849472, 'timestamp': '2025-09-10 02:33:55.423361', 'step': 6747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:55.462180', 'step': 6747, 'epoch': 1} {'type': 'loss', 'content': 0.18953338265419006, 'timestamp': '2025-09-10 02:33:55.485940', 'step': 6748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:55.516763', 'step': 6748, 'epoch': 1} {'type': 'loss', 'content': 0.1264081746339798, 'timestamp': '2025-09-10 02:33:55.519361', 'step': 6749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:55.548798', 'step': 6749, 'epoch': 1} {'type': 'loss', 'content': 0.24365860223770142, 'timestamp': '2025-09-10 02:33:55.551984', 'step': 6750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:55.585573', 'step': 6750, 'epoch': 1} {'type': 'loss', 'content': 0.09404873102903366, 'timestamp': '2025-09-10 02:33:55.588688', 'step': 6751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:55.623912', 'step': 6751, 'epoch': 1} {'type': 'loss', 'content': 0.18238839507102966, 'timestamp': '2025-09-10 02:33:55.647687', 'step': 6752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:55.678137', 'step': 6752, 'epoch': 1} {'type': 'loss', 'content': 0.16609445214271545, 'timestamp': '2025-09-10 02:33:55.681462', 'step': 6753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:55.722569', 'step': 6753, 'epoch': 1} {'type': 'loss', 'content': 0.11334235966205597, 'timestamp': '2025-09-10 02:33:55.726194', 'step': 6754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:55.758665', 'step': 6754, 'epoch': 1} {'type': 'loss', 'content': 0.15861588716506958, 'timestamp': '2025-09-10 02:33:55.762378', 'step': 6755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:55.795320', 'step': 6755, 'epoch': 1} {'type': 'loss', 'content': 0.14680485427379608, 'timestamp': '2025-09-10 02:33:55.819072', 'step': 6756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:55.857642', 'step': 6756, 'epoch': 1} {'type': 'loss', 'content': 0.11758710443973541, 'timestamp': '2025-09-10 02:33:55.860240', 'step': 6757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:55.889992', 'step': 6757, 'epoch': 1} {'type': 'loss', 'content': 0.045472558587789536, 'timestamp': '2025-09-10 02:33:55.895538', 'step': 6758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:55.926076', 'step': 6758, 'epoch': 1} {'type': 'loss', 'content': 0.13178545236587524, 'timestamp': '2025-09-10 02:33:55.929123', 'step': 6759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:55.962576', 'step': 6759, 'epoch': 1} {'type': 'loss', 'content': 0.10707703977823257, 'timestamp': '2025-09-10 02:33:55.987717', 'step': 6760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.019855', 'step': 6760, 'epoch': 1} {'type': 'loss', 'content': 0.14082956314086914, 'timestamp': '2025-09-10 02:33:56.023114', 'step': 6761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:56.053853', 'step': 6761, 'epoch': 1} {'type': 'loss', 'content': 0.16825172305107117, 'timestamp': '2025-09-10 02:33:56.057275', 'step': 6762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:56.087715', 'step': 6762, 'epoch': 1} {'type': 'loss', 'content': 0.13086658716201782, 'timestamp': '2025-09-10 02:33:56.090681', 'step': 6763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.122923', 'step': 6763, 'epoch': 1} {'type': 'loss', 'content': 0.0944906696677208, 'timestamp': '2025-09-10 02:33:56.147786', 'step': 6764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.178545', 'step': 6764, 'epoch': 1} {'type': 'loss', 'content': 0.13469311594963074, 'timestamp': '2025-09-10 02:33:56.181178', 'step': 6765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:56.211402', 'step': 6765, 'epoch': 1} {'type': 'loss', 'content': 0.19695214927196503, 'timestamp': '2025-09-10 02:33:56.214098', 'step': 6766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.243816', 'step': 6766, 'epoch': 1} {'type': 'loss', 'content': 0.16049449145793915, 'timestamp': '2025-09-10 02:33:56.246767', 'step': 6767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.276923', 'step': 6767, 'epoch': 1} {'type': 'loss', 'content': 0.1943357139825821, 'timestamp': '2025-09-10 02:33:56.300527', 'step': 6768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.330217', 'step': 6768, 'epoch': 1} {'type': 'loss', 'content': 0.07972543686628342, 'timestamp': '2025-09-10 02:33:56.332739', 'step': 6769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.362585', 'step': 6769, 'epoch': 1} {'type': 'loss', 'content': 0.22836162149906158, 'timestamp': '2025-09-10 02:33:56.364952', 'step': 6770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:56.395167', 'step': 6770, 'epoch': 1} {'type': 'loss', 'content': 0.10006449371576309, 'timestamp': '2025-09-10 02:33:56.397763', 'step': 6771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:56.427262', 'step': 6771, 'epoch': 1} {'type': 'loss', 'content': 0.1759585589170456, 'timestamp': '2025-09-10 02:33:56.450894', 'step': 6772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:56.481315', 'step': 6772, 'epoch': 1} {'type': 'loss', 'content': 0.10352370142936707, 'timestamp': '2025-09-10 02:33:56.483415', 'step': 6773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:56.513298', 'step': 6773, 'epoch': 1} {'type': 'loss', 'content': 0.19687169790267944, 'timestamp': '2025-09-10 02:33:56.517084', 'step': 6774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:56.546465', 'step': 6774, 'epoch': 1} {'type': 'loss', 'content': 0.05205599591135979, 'timestamp': '2025-09-10 02:33:56.549123', 'step': 6775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:56.578856', 'step': 6775, 'epoch': 1} {'type': 'loss', 'content': 0.09268849343061447, 'timestamp': '2025-09-10 02:33:56.603874', 'step': 6776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.634277', 'step': 6776, 'epoch': 1} {'type': 'loss', 'content': 0.10151255875825882, 'timestamp': '2025-09-10 02:33:56.636657', 'step': 6777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.666408', 'step': 6777, 'epoch': 1} {'type': 'loss', 'content': 0.1812439113855362, 'timestamp': '2025-09-10 02:33:56.668777', 'step': 6778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:56.698162', 'step': 6778, 'epoch': 1} {'type': 'loss', 'content': 0.14650215208530426, 'timestamp': '2025-09-10 02:33:56.700764', 'step': 6779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.730946', 'step': 6779, 'epoch': 1} {'type': 'loss', 'content': 0.19142085313796997, 'timestamp': '2025-09-10 02:33:56.754675', 'step': 6780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:56.784382', 'step': 6780, 'epoch': 1} {'type': 'loss', 'content': 0.07426121085882187, 'timestamp': '2025-09-10 02:33:56.787336', 'step': 6781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:56.816860', 'step': 6781, 'epoch': 1} {'type': 'loss', 'content': 0.09163738042116165, 'timestamp': '2025-09-10 02:33:56.820270', 'step': 6782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:56.849626', 'step': 6782, 'epoch': 1} {'type': 'loss', 'content': 0.20425833761692047, 'timestamp': '2025-09-10 02:33:56.852252', 'step': 6783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:56.882280', 'step': 6783, 'epoch': 1} {'type': 'loss', 'content': 0.1315954029560089, 'timestamp': '2025-09-10 02:33:56.906055', 'step': 6784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:56.935585', 'step': 6784, 'epoch': 1} {'type': 'loss', 'content': 0.15396539866924286, 'timestamp': '2025-09-10 02:33:56.937872', 'step': 6785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:56.967339', 'step': 6785, 'epoch': 1} {'type': 'loss', 'content': 0.09906163811683655, 'timestamp': '2025-09-10 02:33:56.969671', 'step': 6786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:56.999972', 'step': 6786, 'epoch': 1} {'type': 'loss', 'content': 0.24001885950565338, 'timestamp': '2025-09-10 02:33:57.002315', 'step': 6787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.031631', 'step': 6787, 'epoch': 1} {'type': 'loss', 'content': 0.11463744193315506, 'timestamp': '2025-09-10 02:33:57.055217', 'step': 6788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:57.085456', 'step': 6788, 'epoch': 1} {'type': 'loss', 'content': 0.12947776913642883, 'timestamp': '2025-09-10 02:33:57.087980', 'step': 6789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.117650', 'step': 6789, 'epoch': 1} {'type': 'loss', 'content': 0.2106972485780716, 'timestamp': '2025-09-10 02:33:57.120232', 'step': 6790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.151529', 'step': 6790, 'epoch': 1} {'type': 'loss', 'content': 0.12295067310333252, 'timestamp': '2025-09-10 02:33:57.153970', 'step': 6791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:57.184187', 'step': 6791, 'epoch': 1} {'type': 'loss', 'content': 0.15411798655986786, 'timestamp': '2025-09-10 02:33:57.208246', 'step': 6792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:57.238795', 'step': 6792, 'epoch': 1} {'type': 'loss', 'content': 0.09665653109550476, 'timestamp': '2025-09-10 02:33:57.240960', 'step': 6793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:57.270352', 'step': 6793, 'epoch': 1} {'type': 'loss', 'content': 0.14919957518577576, 'timestamp': '2025-09-10 02:33:57.272903', 'step': 6794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:57.302709', 'step': 6794, 'epoch': 1} {'type': 'loss', 'content': 0.07848803699016571, 'timestamp': '2025-09-10 02:33:57.305073', 'step': 6795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.334477', 'step': 6795, 'epoch': 1} {'type': 'loss', 'content': 0.11524466425180435, 'timestamp': '2025-09-10 02:33:57.358348', 'step': 6796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:57.388853', 'step': 6796, 'epoch': 1} {'type': 'loss', 'content': 0.2069968283176422, 'timestamp': '2025-09-10 02:33:57.391188', 'step': 6797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.422260', 'step': 6797, 'epoch': 1} {'type': 'loss', 'content': 0.18013831973075867, 'timestamp': '2025-09-10 02:33:57.425082', 'step': 6798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.455931', 'step': 6798, 'epoch': 1} {'type': 'loss', 'content': 0.11356331408023834, 'timestamp': '2025-09-10 02:33:57.458109', 'step': 6799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:57.487605', 'step': 6799, 'epoch': 1} {'type': 'loss', 'content': 0.23078887164592743, 'timestamp': '2025-09-10 02:33:57.511184', 'step': 6800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.541269', 'step': 6800, 'epoch': 1} {'type': 'loss', 'content': 0.1371740698814392, 'timestamp': '2025-09-10 02:33:57.543559', 'step': 6801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:57.573528', 'step': 6801, 'epoch': 1} {'type': 'loss', 'content': 0.2054225653409958, 'timestamp': '2025-09-10 02:33:57.575931', 'step': 6802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.615330', 'step': 6802, 'epoch': 1} {'type': 'loss', 'content': 0.19858814775943756, 'timestamp': '2025-09-10 02:33:57.621771', 'step': 6803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.656327', 'step': 6803, 'epoch': 1} {'type': 'loss', 'content': 0.12025469541549683, 'timestamp': '2025-09-10 02:33:57.679887', 'step': 6804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:57.711563', 'step': 6804, 'epoch': 1} {'type': 'loss', 'content': 0.19913850724697113, 'timestamp': '2025-09-10 02:33:57.714026', 'step': 6805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:57.744161', 'step': 6805, 'epoch': 1} {'type': 'loss', 'content': 0.11440008878707886, 'timestamp': '2025-09-10 02:33:57.746535', 'step': 6806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:57.776759', 'step': 6806, 'epoch': 1} {'type': 'loss', 'content': 0.07414209097623825, 'timestamp': '2025-09-10 02:33:57.779243', 'step': 6807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:57.809619', 'step': 6807, 'epoch': 1} {'type': 'loss', 'content': 0.13703255355358124, 'timestamp': '2025-09-10 02:33:57.833799', 'step': 6808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:57.863727', 'step': 6808, 'epoch': 1} {'type': 'loss', 'content': 0.12915216386318207, 'timestamp': '2025-09-10 02:33:57.866367', 'step': 6809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:57.896322', 'step': 6809, 'epoch': 1} {'type': 'loss', 'content': 0.19038273394107819, 'timestamp': '2025-09-10 02:33:57.908636', 'step': 6810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:57.951782', 'step': 6810, 'epoch': 1} {'type': 'loss', 'content': 0.11678171157836914, 'timestamp': '2025-09-10 02:33:57.959649', 'step': 6811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:57.995828', 'step': 6811, 'epoch': 1} {'type': 'loss', 'content': 0.1731584221124649, 'timestamp': '2025-09-10 02:33:58.022712', 'step': 6812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:58.064129', 'step': 6812, 'epoch': 1} {'type': 'loss', 'content': 0.09429869800806046, 'timestamp': '2025-09-10 02:33:58.068714', 'step': 6813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:58.117326', 'step': 6813, 'epoch': 1} {'type': 'loss', 'content': 0.11461274325847626, 'timestamp': '2025-09-10 02:33:58.123605', 'step': 6814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:58.172644', 'step': 6814, 'epoch': 1} {'type': 'loss', 'content': 0.14418061077594757, 'timestamp': '2025-09-10 02:33:58.183030', 'step': 6815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:58.228956', 'step': 6815, 'epoch': 1} {'type': 'loss', 'content': 0.09757667034864426, 'timestamp': '2025-09-10 02:33:58.257389', 'step': 6816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:58.297758', 'step': 6816, 'epoch': 1} {'type': 'loss', 'content': 0.14855678379535675, 'timestamp': '2025-09-10 02:33:58.301996', 'step': 6817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:58.346297', 'step': 6817, 'epoch': 1} {'type': 'loss', 'content': 0.18624834716320038, 'timestamp': '2025-09-10 02:33:58.351736', 'step': 6818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:58.398769', 'step': 6818, 'epoch': 1} {'type': 'loss', 'content': 0.20485670864582062, 'timestamp': '2025-09-10 02:33:58.406227', 'step': 6819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:58.480142', 'step': 6819, 'epoch': 1} {'type': 'loss', 'content': 0.09518633037805557, 'timestamp': '2025-09-10 02:33:58.518200', 'step': 6820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:58.579066', 'step': 6820, 'epoch': 1} {'type': 'loss', 'content': 0.16500623524188995, 'timestamp': '2025-09-10 02:33:58.588391', 'step': 6821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:58.641918', 'step': 6821, 'epoch': 1} {'type': 'loss', 'content': 0.12812171876430511, 'timestamp': '2025-09-10 02:33:58.649473', 'step': 6822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:58.694573', 'step': 6822, 'epoch': 1} {'type': 'loss', 'content': 0.12790237367153168, 'timestamp': '2025-09-10 02:33:58.699706', 'step': 6823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:58.747251', 'step': 6823, 'epoch': 1} {'type': 'loss', 'content': 0.16203343868255615, 'timestamp': '2025-09-10 02:33:58.779770', 'step': 6824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:58.829763', 'step': 6824, 'epoch': 1} {'type': 'loss', 'content': 0.1629803627729416, 'timestamp': '2025-09-10 02:33:58.849932', 'step': 6825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:58.907501', 'step': 6825, 'epoch': 1} {'type': 'loss', 'content': 0.20006296038627625, 'timestamp': '2025-09-10 02:33:58.921752', 'step': 6826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:58.961940', 'step': 6826, 'epoch': 1} {'type': 'loss', 'content': 0.1955275982618332, 'timestamp': '2025-09-10 02:33:58.966409', 'step': 6827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:59.002207', 'step': 6827, 'epoch': 1} {'type': 'loss', 'content': 0.09437139332294464, 'timestamp': '2025-09-10 02:33:59.029253', 'step': 6828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:59.074382', 'step': 6828, 'epoch': 1} {'type': 'loss', 'content': 0.11293365806341171, 'timestamp': '2025-09-10 02:33:59.076611', 'step': 6829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:59.107550', 'step': 6829, 'epoch': 1} {'type': 'loss', 'content': 0.10133273899555206, 'timestamp': '2025-09-10 02:33:59.114966', 'step': 6830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:59.144784', 'step': 6830, 'epoch': 1} {'type': 'loss', 'content': 0.2479800134897232, 'timestamp': '2025-09-10 02:33:59.147666', 'step': 6831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:59.182629', 'step': 6831, 'epoch': 1} {'type': 'loss', 'content': 0.0949322059750557, 'timestamp': '2025-09-10 02:33:59.210193', 'step': 6832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:59.243544', 'step': 6832, 'epoch': 1} {'type': 'loss', 'content': 0.18855588138103485, 'timestamp': '2025-09-10 02:33:59.248233', 'step': 6833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:59.288936', 'step': 6833, 'epoch': 1} {'type': 'loss', 'content': 0.16078674793243408, 'timestamp': '2025-09-10 02:33:59.296637', 'step': 6834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:59.333490', 'step': 6834, 'epoch': 1} {'type': 'loss', 'content': 0.10500791668891907, 'timestamp': '2025-09-10 02:33:59.346782', 'step': 6835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:59.397792', 'step': 6835, 'epoch': 1} {'type': 'loss', 'content': 0.14441052079200745, 'timestamp': '2025-09-10 02:33:59.424093', 'step': 6836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:33:59.454044', 'step': 6836, 'epoch': 1} {'type': 'loss', 'content': 0.19630882143974304, 'timestamp': '2025-09-10 02:33:59.456308', 'step': 6837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:59.487643', 'step': 6837, 'epoch': 1} {'type': 'loss', 'content': 0.11499626189470291, 'timestamp': '2025-09-10 02:33:59.490240', 'step': 6838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:59.520362', 'step': 6838, 'epoch': 1} {'type': 'loss', 'content': 0.1542889028787613, 'timestamp': '2025-09-10 02:33:59.522973', 'step': 6839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:59.553075', 'step': 6839, 'epoch': 1} {'type': 'loss', 'content': 0.13497328758239746, 'timestamp': '2025-09-10 02:33:59.577583', 'step': 6840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:33:59.607919', 'step': 6840, 'epoch': 1} {'type': 'loss', 'content': 0.14575818181037903, 'timestamp': '2025-09-10 02:33:59.610313', 'step': 6841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:59.641550', 'step': 6841, 'epoch': 1} {'type': 'loss', 'content': 0.12597452104091644, 'timestamp': '2025-09-10 02:33:59.644093', 'step': 6842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:59.673669', 'step': 6842, 'epoch': 1} {'type': 'loss', 'content': 0.14015823602676392, 'timestamp': '2025-09-10 02:33:59.676451', 'step': 6843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:59.706090', 'step': 6843, 'epoch': 1} {'type': 'loss', 'content': 0.131053164601326, 'timestamp': '2025-09-10 02:33:59.730369', 'step': 6844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:59.760325', 'step': 6844, 'epoch': 1} {'type': 'loss', 'content': 0.14849276840686798, 'timestamp': '2025-09-10 02:33:59.762633', 'step': 6845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:59.793575', 'step': 6845, 'epoch': 1} {'type': 'loss', 'content': 0.097601018846035, 'timestamp': '2025-09-10 02:33:59.796739', 'step': 6846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:59.826810', 'step': 6846, 'epoch': 1} {'type': 'loss', 'content': 0.11086229979991913, 'timestamp': '2025-09-10 02:33:59.829646', 'step': 6847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:59.861243', 'step': 6847, 'epoch': 1} {'type': 'loss', 'content': 0.12631437182426453, 'timestamp': '2025-09-10 02:33:59.885094', 'step': 6848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:59.915072', 'step': 6848, 'epoch': 1} {'type': 'loss', 'content': 0.12179607897996902, 'timestamp': '2025-09-10 02:33:59.917672', 'step': 6849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:59.949337', 'step': 6849, 'epoch': 1} {'type': 'loss', 'content': 0.08584132045507431, 'timestamp': '2025-09-10 02:33:59.951615', 'step': 6850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:59.981597', 'step': 6850, 'epoch': 1} {'type': 'loss', 'content': 0.11789267510175705, 'timestamp': '2025-09-10 02:33:59.983891', 'step': 6851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:00.014769', 'step': 6851, 'epoch': 1} {'type': 'loss', 'content': 0.11198690533638, 'timestamp': '2025-09-10 02:34:00.038262', 'step': 6852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:00.069018', 'step': 6852, 'epoch': 1} {'type': 'loss', 'content': 0.12706691026687622, 'timestamp': '2025-09-10 02:34:00.071529', 'step': 6853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:00.102134', 'step': 6853, 'epoch': 1} {'type': 'loss', 'content': 0.206422358751297, 'timestamp': '2025-09-10 02:34:00.104905', 'step': 6854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:00.135931', 'step': 6854, 'epoch': 1} {'type': 'loss', 'content': 0.19109538197517395, 'timestamp': '2025-09-10 02:34:00.138450', 'step': 6855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:00.167825', 'step': 6855, 'epoch': 1} {'type': 'loss', 'content': 0.1831086426973343, 'timestamp': '2025-09-10 02:34:00.191473', 'step': 6856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:00.221441', 'step': 6856, 'epoch': 1} {'type': 'loss', 'content': 0.08529011905193329, 'timestamp': '2025-09-10 02:34:00.224221', 'step': 6857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:00.254952', 'step': 6857, 'epoch': 1} {'type': 'loss', 'content': 0.10999158769845963, 'timestamp': '2025-09-10 02:34:00.257455', 'step': 6858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:00.290465', 'step': 6858, 'epoch': 1} {'type': 'loss', 'content': 0.11921227723360062, 'timestamp': '2025-09-10 02:34:00.292808', 'step': 6859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:00.323323', 'step': 6859, 'epoch': 1} {'type': 'loss', 'content': 0.08522065728902817, 'timestamp': '2025-09-10 02:34:00.347011', 'step': 6860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:00.378020', 'step': 6860, 'epoch': 1} {'type': 'loss', 'content': 0.14920087158679962, 'timestamp': '2025-09-10 02:34:00.380633', 'step': 6861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:00.410731', 'step': 6861, 'epoch': 1} {'type': 'loss', 'content': 0.14379064738750458, 'timestamp': '2025-09-10 02:34:00.413085', 'step': 6862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:00.442488', 'step': 6862, 'epoch': 1} {'type': 'loss', 'content': 0.2563904821872711, 'timestamp': '2025-09-10 02:34:00.444802', 'step': 6863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:00.474372', 'step': 6863, 'epoch': 1} {'type': 'loss', 'content': 0.1809024214744568, 'timestamp': '2025-09-10 02:34:00.498798', 'step': 6864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:00.529389', 'step': 6864, 'epoch': 1} {'type': 'loss', 'content': 0.09668847173452377, 'timestamp': '2025-09-10 02:34:00.531798', 'step': 6865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:00.561783', 'step': 6865, 'epoch': 1} {'type': 'loss', 'content': 0.10519066452980042, 'timestamp': '2025-09-10 02:34:00.564379', 'step': 6866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:00.596082', 'step': 6866, 'epoch': 1} {'type': 'loss', 'content': 0.13354812562465668, 'timestamp': '2025-09-10 02:34:00.602990', 'step': 6867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:00.639856', 'step': 6867, 'epoch': 1} {'type': 'loss', 'content': 0.11906253546476364, 'timestamp': '2025-09-10 02:34:00.663361', 'step': 6868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:00.693925', 'step': 6868, 'epoch': 1} {'type': 'loss', 'content': 0.21394754946231842, 'timestamp': '2025-09-10 02:34:00.696454', 'step': 6869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:00.727032', 'step': 6869, 'epoch': 1} {'type': 'loss', 'content': 0.123796746134758, 'timestamp': '2025-09-10 02:34:00.729097', 'step': 6870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:00.758461', 'step': 6870, 'epoch': 1} {'type': 'loss', 'content': 0.18810516595840454, 'timestamp': '2025-09-10 02:34:00.760724', 'step': 6871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:00.789918', 'step': 6871, 'epoch': 1} {'type': 'loss', 'content': 0.16592103242874146, 'timestamp': '2025-09-10 02:34:00.813350', 'step': 6872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:00.843360', 'step': 6872, 'epoch': 1} {'type': 'loss', 'content': 0.17946156859397888, 'timestamp': '2025-09-10 02:34:00.845841', 'step': 6873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:00.875396', 'step': 6873, 'epoch': 1} {'type': 'loss', 'content': 0.141701802611351, 'timestamp': '2025-09-10 02:34:00.880615', 'step': 6874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:00.911718', 'step': 6874, 'epoch': 1} {'type': 'loss', 'content': 0.2432016283273697, 'timestamp': '2025-09-10 02:34:00.914413', 'step': 6875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:00.944362', 'step': 6875, 'epoch': 1} {'type': 'loss', 'content': 0.12654364109039307, 'timestamp': '2025-09-10 02:34:00.968011', 'step': 6876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:00.999183', 'step': 6876, 'epoch': 1} {'type': 'loss', 'content': 0.12653203308582306, 'timestamp': '2025-09-10 02:34:01.001333', 'step': 6877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:01.030934', 'step': 6877, 'epoch': 1} {'type': 'loss', 'content': 0.17997236549854279, 'timestamp': '2025-09-10 02:34:01.033485', 'step': 6878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:01.063459', 'step': 6878, 'epoch': 1} {'type': 'loss', 'content': 0.15817345678806305, 'timestamp': '2025-09-10 02:34:01.066095', 'step': 6879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:01.096081', 'step': 6879, 'epoch': 1} {'type': 'loss', 'content': 0.11289713531732559, 'timestamp': '2025-09-10 02:34:01.119373', 'step': 6880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:01.150869', 'step': 6880, 'epoch': 1} {'type': 'loss', 'content': 0.1277998685836792, 'timestamp': '2025-09-10 02:34:01.153007', 'step': 6881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:01.181983', 'step': 6881, 'epoch': 1} {'type': 'loss', 'content': 0.2992531657218933, 'timestamp': '2025-09-10 02:34:01.184533', 'step': 6882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:01.214057', 'step': 6882, 'epoch': 1} {'type': 'loss', 'content': 0.13031232357025146, 'timestamp': '2025-09-10 02:34:01.216114', 'step': 6883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:01.245483', 'step': 6883, 'epoch': 1} {'type': 'loss', 'content': 0.15248383581638336, 'timestamp': '2025-09-10 02:34:01.269421', 'step': 6884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:01.300372', 'step': 6884, 'epoch': 1} {'type': 'loss', 'content': 0.166751891374588, 'timestamp': '2025-09-10 02:34:01.302937', 'step': 6885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:01.332554', 'step': 6885, 'epoch': 1} {'type': 'loss', 'content': 0.1198703944683075, 'timestamp': '2025-09-10 02:34:01.334710', 'step': 6886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:01.364419', 'step': 6886, 'epoch': 1} {'type': 'loss', 'content': 0.12175918370485306, 'timestamp': '2025-09-10 02:34:01.366688', 'step': 6887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:01.396816', 'step': 6887, 'epoch': 1} {'type': 'loss', 'content': 0.09331747889518738, 'timestamp': '2025-09-10 02:34:01.422141', 'step': 6888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:01.451774', 'step': 6888, 'epoch': 1} {'type': 'loss', 'content': 0.10127764940261841, 'timestamp': '2025-09-10 02:34:01.454180', 'step': 6889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:01.484098', 'step': 6889, 'epoch': 1} {'type': 'loss', 'content': 0.10801981389522552, 'timestamp': '2025-09-10 02:34:01.486669', 'step': 6890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:01.528571', 'step': 6890, 'epoch': 1} {'type': 'loss', 'content': 0.14450006186962128, 'timestamp': '2025-09-10 02:34:01.531066', 'step': 6891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:01.560559', 'step': 6891, 'epoch': 1} {'type': 'loss', 'content': 0.14145749807357788, 'timestamp': '2025-09-10 02:34:01.584171', 'step': 6892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:01.614210', 'step': 6892, 'epoch': 1} {'type': 'loss', 'content': 0.22398878633975983, 'timestamp': '2025-09-10 02:34:01.616338', 'step': 6893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:01.645722', 'step': 6893, 'epoch': 1} {'type': 'loss', 'content': 0.08857473731040955, 'timestamp': '2025-09-10 02:34:01.647800', 'step': 6894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:01.677171', 'step': 6894, 'epoch': 1} {'type': 'loss', 'content': 0.1518641859292984, 'timestamp': '2025-09-10 02:34:01.679676', 'step': 6895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:01.711612', 'step': 6895, 'epoch': 1} {'type': 'loss', 'content': 0.16195273399353027, 'timestamp': '2025-09-10 02:34:01.737212', 'step': 6896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:01.766582', 'step': 6896, 'epoch': 1} {'type': 'loss', 'content': 0.12302510440349579, 'timestamp': '2025-09-10 02:34:01.768853', 'step': 6897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:01.797902', 'step': 6897, 'epoch': 1} {'type': 'loss', 'content': 0.15214680135250092, 'timestamp': '2025-09-10 02:34:01.800355', 'step': 6898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:01.830344', 'step': 6898, 'epoch': 1} {'type': 'loss', 'content': 0.20695677399635315, 'timestamp': '2025-09-10 02:34:01.832525', 'step': 6899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:01.861839', 'step': 6899, 'epoch': 1} {'type': 'loss', 'content': 0.15481051802635193, 'timestamp': '2025-09-10 02:34:01.885189', 'step': 6900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:01.915139', 'step': 6900, 'epoch': 1} {'type': 'loss', 'content': 0.1417367160320282, 'timestamp': '2025-09-10 02:34:01.917695', 'step': 6901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:01.947800', 'step': 6901, 'epoch': 1} {'type': 'loss', 'content': 0.3223535120487213, 'timestamp': '2025-09-10 02:34:01.949910', 'step': 6902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:01.979251', 'step': 6902, 'epoch': 1} {'type': 'loss', 'content': 0.21998144686222076, 'timestamp': '2025-09-10 02:34:01.982402', 'step': 6903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:02.014574', 'step': 6903, 'epoch': 1} {'type': 'loss', 'content': 0.11714565753936768, 'timestamp': '2025-09-10 02:34:02.038500', 'step': 6904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:02.068445', 'step': 6904, 'epoch': 1} {'type': 'loss', 'content': 0.2121572047472, 'timestamp': '2025-09-10 02:34:02.070994', 'step': 6905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:02.100562', 'step': 6905, 'epoch': 1} {'type': 'loss', 'content': 0.20341187715530396, 'timestamp': '2025-09-10 02:34:02.102875', 'step': 6906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:02.132747', 'step': 6906, 'epoch': 1} {'type': 'loss', 'content': 0.15901383757591248, 'timestamp': '2025-09-10 02:34:02.135992', 'step': 6907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:02.164940', 'step': 6907, 'epoch': 1} {'type': 'loss', 'content': 0.09492994099855423, 'timestamp': '2025-09-10 02:34:02.188323', 'step': 6908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:02.218533', 'step': 6908, 'epoch': 1} {'type': 'loss', 'content': 0.13564403355121613, 'timestamp': '2025-09-10 02:34:02.221182', 'step': 6909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:02.253705', 'step': 6909, 'epoch': 1} {'type': 'loss', 'content': 0.1750601828098297, 'timestamp': '2025-09-10 02:34:02.255876', 'step': 6910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:02.285329', 'step': 6910, 'epoch': 1} {'type': 'loss', 'content': 0.12681512534618378, 'timestamp': '2025-09-10 02:34:02.287614', 'step': 6911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:02.318332', 'step': 6911, 'epoch': 1} {'type': 'loss', 'content': 0.10807760804891586, 'timestamp': '2025-09-10 02:34:02.341832', 'step': 6912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:02.372003', 'step': 6912, 'epoch': 1} {'type': 'loss', 'content': 0.16123716533184052, 'timestamp': '2025-09-10 02:34:02.374259', 'step': 6913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:02.403910', 'step': 6913, 'epoch': 1} {'type': 'loss', 'content': 0.14077724516391754, 'timestamp': '2025-09-10 02:34:02.406042', 'step': 6914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:02.436010', 'step': 6914, 'epoch': 1} {'type': 'loss', 'content': 0.1883106678724289, 'timestamp': '2025-09-10 02:34:02.437923', 'step': 6915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:02.467684', 'step': 6915, 'epoch': 1} {'type': 'loss', 'content': 0.10770663619041443, 'timestamp': '2025-09-10 02:34:02.491137', 'step': 6916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:02.521458', 'step': 6916, 'epoch': 1} {'type': 'loss', 'content': 0.09650592505931854, 'timestamp': '2025-09-10 02:34:02.523687', 'step': 6917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:02.553812', 'step': 6917, 'epoch': 1} {'type': 'loss', 'content': 0.07645541429519653, 'timestamp': '2025-09-10 02:34:02.556331', 'step': 6918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:02.585811', 'step': 6918, 'epoch': 1} {'type': 'loss', 'content': 0.13609753549098969, 'timestamp': '2025-09-10 02:34:02.588061', 'step': 6919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:02.618238', 'step': 6919, 'epoch': 1} {'type': 'loss', 'content': 0.08460981398820877, 'timestamp': '2025-09-10 02:34:02.642409', 'step': 6920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:02.671365', 'step': 6920, 'epoch': 1} {'type': 'loss', 'content': 0.0911179855465889, 'timestamp': '2025-09-10 02:34:02.673506', 'step': 6921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:02.703110', 'step': 6921, 'epoch': 1} {'type': 'loss', 'content': 0.12284395098686218, 'timestamp': '2025-09-10 02:34:02.705271', 'step': 6922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:02.734902', 'step': 6922, 'epoch': 1} {'type': 'loss', 'content': 0.1589781492948532, 'timestamp': '2025-09-10 02:34:02.736977', 'step': 6923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:02.766067', 'step': 6923, 'epoch': 1} {'type': 'loss', 'content': 0.114347442984581, 'timestamp': '2025-09-10 02:34:02.789674', 'step': 6924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:02.820390', 'step': 6924, 'epoch': 1} {'type': 'loss', 'content': 0.13485178351402283, 'timestamp': '2025-09-10 02:34:02.822464', 'step': 6925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:02.851925', 'step': 6925, 'epoch': 1} {'type': 'loss', 'content': 0.15933674573898315, 'timestamp': '2025-09-10 02:34:02.856427', 'step': 6926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:02.885617', 'step': 6926, 'epoch': 1} {'type': 'loss', 'content': 0.1733834147453308, 'timestamp': '2025-09-10 02:34:02.887928', 'step': 6927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:02.917643', 'step': 6927, 'epoch': 1} {'type': 'loss', 'content': 0.17712338268756866, 'timestamp': '2025-09-10 02:34:02.941024', 'step': 6928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:02.970125', 'step': 6928, 'epoch': 1} {'type': 'loss', 'content': 0.15244810283184052, 'timestamp': '2025-09-10 02:34:02.972334', 'step': 6929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:03.001603', 'step': 6929, 'epoch': 1} {'type': 'loss', 'content': 0.19539059698581696, 'timestamp': '2025-09-10 02:34:03.003898', 'step': 6930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:03.033190', 'step': 6930, 'epoch': 1} {'type': 'loss', 'content': 0.10928576439619064, 'timestamp': '2025-09-10 02:34:03.035439', 'step': 6931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:03.065105', 'step': 6931, 'epoch': 1} {'type': 'loss', 'content': 0.0587698295712471, 'timestamp': '2025-09-10 02:34:03.088581', 'step': 6932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:03.119037', 'step': 6932, 'epoch': 1} {'type': 'loss', 'content': 0.08276036381721497, 'timestamp': '2025-09-10 02:34:03.121693', 'step': 6933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:03.152030', 'step': 6933, 'epoch': 1} {'type': 'loss', 'content': 0.13637711107730865, 'timestamp': '2025-09-10 02:34:03.154362', 'step': 6934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:03.183604', 'step': 6934, 'epoch': 1} {'type': 'loss', 'content': 0.10729195177555084, 'timestamp': '2025-09-10 02:34:03.185922', 'step': 6935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:03.216990', 'step': 6935, 'epoch': 1} {'type': 'loss', 'content': 0.11995332688093185, 'timestamp': '2025-09-10 02:34:03.240642', 'step': 6936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.272799', 'step': 6936, 'epoch': 1} {'type': 'loss', 'content': 0.11931348592042923, 'timestamp': '2025-09-10 02:34:03.275005', 'step': 6937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:03.305156', 'step': 6937, 'epoch': 1} {'type': 'loss', 'content': 0.08825390785932541, 'timestamp': '2025-09-10 02:34:03.307431', 'step': 6938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:34:03.337199', 'step': 6938, 'epoch': 1} {'type': 'loss', 'content': 0.14779292047023773, 'timestamp': '2025-09-10 02:34:03.342246', 'step': 6939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.371307', 'step': 6939, 'epoch': 1} {'type': 'loss', 'content': 0.16488057374954224, 'timestamp': '2025-09-10 02:34:03.394739', 'step': 6940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.423997', 'step': 6940, 'epoch': 1} {'type': 'loss', 'content': 0.17445696890354156, 'timestamp': '2025-09-10 02:34:03.426217', 'step': 6941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.456940', 'step': 6941, 'epoch': 1} {'type': 'loss', 'content': 0.2253144234418869, 'timestamp': '2025-09-10 02:34:03.459073', 'step': 6942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.488403', 'step': 6942, 'epoch': 1} {'type': 'loss', 'content': 0.1505047231912613, 'timestamp': '2025-09-10 02:34:03.490456', 'step': 6943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:03.519872', 'step': 6943, 'epoch': 1} {'type': 'loss', 'content': 0.11518671363592148, 'timestamp': '2025-09-10 02:34:03.543670', 'step': 6944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.574478', 'step': 6944, 'epoch': 1} {'type': 'loss', 'content': 0.16051241755485535, 'timestamp': '2025-09-10 02:34:03.576598', 'step': 6945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.605964', 'step': 6945, 'epoch': 1} {'type': 'loss', 'content': 0.12411104142665863, 'timestamp': '2025-09-10 02:34:03.608285', 'step': 6946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.637641', 'step': 6946, 'epoch': 1} {'type': 'loss', 'content': 0.17712955176830292, 'timestamp': '2025-09-10 02:34:03.639998', 'step': 6947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:03.669777', 'step': 6947, 'epoch': 1} {'type': 'loss', 'content': 0.09265245497226715, 'timestamp': '2025-09-10 02:34:03.694294', 'step': 6948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.724843', 'step': 6948, 'epoch': 1} {'type': 'loss', 'content': 0.14049582183361053, 'timestamp': '2025-09-10 02:34:03.728968', 'step': 6949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:03.766781', 'step': 6949, 'epoch': 1} {'type': 'loss', 'content': 0.21350659430027008, 'timestamp': '2025-09-10 02:34:03.772501', 'step': 6950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:03.806512', 'step': 6950, 'epoch': 1} {'type': 'loss', 'content': 0.146570086479187, 'timestamp': '2025-09-10 02:34:03.810100', 'step': 6951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:03.839972', 'step': 6951, 'epoch': 1} {'type': 'loss', 'content': 0.12010766565799713, 'timestamp': '2025-09-10 02:34:03.863491', 'step': 6952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:03.893820', 'step': 6952, 'epoch': 1} {'type': 'loss', 'content': 0.16628633439540863, 'timestamp': '2025-09-10 02:34:03.895937', 'step': 6953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:03.925394', 'step': 6953, 'epoch': 1} {'type': 'loss', 'content': 0.10962900519371033, 'timestamp': '2025-09-10 02:34:03.927942', 'step': 6954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:03.957755', 'step': 6954, 'epoch': 1} {'type': 'loss', 'content': 0.21911296248435974, 'timestamp': '2025-09-10 02:34:03.960504', 'step': 6955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:03.989929', 'step': 6955, 'epoch': 1} {'type': 'loss', 'content': 0.10977502912282944, 'timestamp': '2025-09-10 02:34:04.013525', 'step': 6956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:04.043100', 'step': 6956, 'epoch': 1} {'type': 'loss', 'content': 0.2625519335269928, 'timestamp': '2025-09-10 02:34:04.044993', 'step': 6957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.074361', 'step': 6957, 'epoch': 1} {'type': 'loss', 'content': 0.10982518643140793, 'timestamp': '2025-09-10 02:34:04.076799', 'step': 6958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:04.107453', 'step': 6958, 'epoch': 1} {'type': 'loss', 'content': 0.2051054835319519, 'timestamp': '2025-09-10 02:34:04.109747', 'step': 6959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:04.140774', 'step': 6959, 'epoch': 1} {'type': 'loss', 'content': 0.17569714784622192, 'timestamp': '2025-09-10 02:34:04.164105', 'step': 6960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.193187', 'step': 6960, 'epoch': 1} {'type': 'loss', 'content': 0.15186886489391327, 'timestamp': '2025-09-10 02:34:04.195449', 'step': 6961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:04.225202', 'step': 6961, 'epoch': 1} {'type': 'loss', 'content': 0.19113415479660034, 'timestamp': '2025-09-10 02:34:04.227497', 'step': 6962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.259296', 'step': 6962, 'epoch': 1} {'type': 'loss', 'content': 0.13260051608085632, 'timestamp': '2025-09-10 02:34:04.261544', 'step': 6963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.290871', 'step': 6963, 'epoch': 1} {'type': 'loss', 'content': 0.1643151044845581, 'timestamp': '2025-09-10 02:34:04.314504', 'step': 6964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:04.344927', 'step': 6964, 'epoch': 1} {'type': 'loss', 'content': 0.14577479660511017, 'timestamp': '2025-09-10 02:34:04.346976', 'step': 6965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.376626', 'step': 6965, 'epoch': 1} {'type': 'loss', 'content': 0.11709325760602951, 'timestamp': '2025-09-10 02:34:04.378780', 'step': 6966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:04.408090', 'step': 6966, 'epoch': 1} {'type': 'loss', 'content': 0.1243273913860321, 'timestamp': '2025-09-10 02:34:04.410245', 'step': 6967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:04.441333', 'step': 6967, 'epoch': 1} {'type': 'loss', 'content': 0.18344621360301971, 'timestamp': '2025-09-10 02:34:04.464787', 'step': 6968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:04.494787', 'step': 6968, 'epoch': 1} {'type': 'loss', 'content': 0.04615677148103714, 'timestamp': '2025-09-10 02:34:04.497495', 'step': 6969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.526953', 'step': 6969, 'epoch': 1} {'type': 'loss', 'content': 0.14612144231796265, 'timestamp': '2025-09-10 02:34:04.529122', 'step': 6970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:04.560118', 'step': 6970, 'epoch': 1} {'type': 'loss', 'content': 0.1290861964225769, 'timestamp': '2025-09-10 02:34:04.562402', 'step': 6971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.591643', 'step': 6971, 'epoch': 1} {'type': 'loss', 'content': 0.13698577880859375, 'timestamp': '2025-09-10 02:34:04.615196', 'step': 6972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:04.648949', 'step': 6972, 'epoch': 1} {'type': 'loss', 'content': 0.3089302182197571, 'timestamp': '2025-09-10 02:34:04.651113', 'step': 6973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:04.680765', 'step': 6973, 'epoch': 1} {'type': 'loss', 'content': 0.17495045065879822, 'timestamp': '2025-09-10 02:34:04.682924', 'step': 6974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:04.713326', 'step': 6974, 'epoch': 1} {'type': 'loss', 'content': 0.15825049579143524, 'timestamp': '2025-09-10 02:34:04.716981', 'step': 6975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.749542', 'step': 6975, 'epoch': 1} {'type': 'loss', 'content': 0.12823863327503204, 'timestamp': '2025-09-10 02:34:04.773152', 'step': 6976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.802836', 'step': 6976, 'epoch': 1} {'type': 'loss', 'content': 0.21411097049713135, 'timestamp': '2025-09-10 02:34:04.805067', 'step': 6977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.834683', 'step': 6977, 'epoch': 1} {'type': 'loss', 'content': 0.27485254406929016, 'timestamp': '2025-09-10 02:34:04.838813', 'step': 6978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:04.868542', 'step': 6978, 'epoch': 1} {'type': 'loss', 'content': 0.11495692282915115, 'timestamp': '2025-09-10 02:34:04.870914', 'step': 6979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:04.900547', 'step': 6979, 'epoch': 1} {'type': 'loss', 'content': 0.15936687588691711, 'timestamp': '2025-09-10 02:34:04.924458', 'step': 6980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:04.953922', 'step': 6980, 'epoch': 1} {'type': 'loss', 'content': 0.13567876815795898, 'timestamp': '2025-09-10 02:34:04.955951', 'step': 6981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:04.985445', 'step': 6981, 'epoch': 1} {'type': 'loss', 'content': 0.0801263302564621, 'timestamp': '2025-09-10 02:34:04.987660', 'step': 6982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:05.017109', 'step': 6982, 'epoch': 1} {'type': 'loss', 'content': 0.18849128484725952, 'timestamp': '2025-09-10 02:34:05.019340', 'step': 6983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:05.049435', 'step': 6983, 'epoch': 1} {'type': 'loss', 'content': 0.10759764164686203, 'timestamp': '2025-09-10 02:34:05.073028', 'step': 6984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:05.102805', 'step': 6984, 'epoch': 1} {'type': 'loss', 'content': 0.09704389423131943, 'timestamp': '2025-09-10 02:34:05.105186', 'step': 6985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:05.134891', 'step': 6985, 'epoch': 1} {'type': 'loss', 'content': 0.23729324340820312, 'timestamp': '2025-09-10 02:34:05.137557', 'step': 6986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:05.166685', 'step': 6986, 'epoch': 1} {'type': 'loss', 'content': 0.19911789894104004, 'timestamp': '2025-09-10 02:34:05.168870', 'step': 6987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:05.198491', 'step': 6987, 'epoch': 1} {'type': 'loss', 'content': 0.1117108091711998, 'timestamp': '2025-09-10 02:34:05.221930', 'step': 6988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:05.253054', 'step': 6988, 'epoch': 1} {'type': 'loss', 'content': 0.09930102527141571, 'timestamp': '2025-09-10 02:34:05.256895', 'step': 6989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:05.286407', 'step': 6989, 'epoch': 1} {'type': 'loss', 'content': 0.18484434485435486, 'timestamp': '2025-09-10 02:34:05.288568', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:34:12.801390', 'step': 6990, 'epoch': 1} {'type': 'pplx', 'content': 8745.828083605744, 'timestamp': '2025-09-10 02:34:12.804526', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:12.834500', 'step': 6990, 'epoch': 1} {'type': 'loss', 'content': 0.17627334594726562, 'timestamp': '2025-09-10 02:34:12.837095', 'step': 6991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:12.867396', 'step': 6991, 'epoch': 1} {'type': 'loss', 'content': 0.15321345627307892, 'timestamp': '2025-09-10 02:34:12.891033', 'step': 6992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:12.921190', 'step': 6992, 'epoch': 1} {'type': 'loss', 'content': 0.17184989154338837, 'timestamp': '2025-09-10 02:34:12.923359', 'step': 6993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:12.955023', 'step': 6993, 'epoch': 1} {'type': 'loss', 'content': 0.09586869925260544, 'timestamp': '2025-09-10 02:34:12.957498', 'step': 6994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:12.987560', 'step': 6994, 'epoch': 1} {'type': 'loss', 'content': 0.22488625347614288, 'timestamp': '2025-09-10 02:34:12.989679', 'step': 6995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:13.019191', 'step': 6995, 'epoch': 1} {'type': 'loss', 'content': 0.07496020197868347, 'timestamp': '2025-09-10 02:34:13.043121', 'step': 6996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:13.074231', 'step': 6996, 'epoch': 1} {'type': 'loss', 'content': 0.12033155560493469, 'timestamp': '2025-09-10 02:34:13.076485', 'step': 6997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:13.106132', 'step': 6997, 'epoch': 1} {'type': 'loss', 'content': 0.2058788388967514, 'timestamp': '2025-09-10 02:34:13.108347', 'step': 6998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:13.137695', 'step': 6998, 'epoch': 1} {'type': 'loss', 'content': 0.10486876964569092, 'timestamp': '2025-09-10 02:34:13.139842', 'step': 6999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:13.169580', 'step': 6999, 'epoch': 1} {'type': 'loss', 'content': 0.2437586486339569, 'timestamp': '2025-09-10 02:34:13.193241', 'step': 7000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-09-10 02:34:18.085322', 'step': 7000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:18.134630', 'step': 7000, 'epoch': 1} {'type': 'loss', 'content': 0.06092371419072151, 'timestamp': '2025-09-10 02:34:18.137073', 'step': 7001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:18.167756', 'step': 7001, 'epoch': 1} {'type': 'loss', 'content': 0.15388494729995728, 'timestamp': '2025-09-10 02:34:18.170040', 'step': 7002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:18.201036', 'step': 7002, 'epoch': 1} {'type': 'loss', 'content': 0.23939380049705505, 'timestamp': '2025-09-10 02:34:18.203675', 'step': 7003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:18.234588', 'step': 7003, 'epoch': 1} {'type': 'loss', 'content': 0.150702103972435, 'timestamp': '2025-09-10 02:34:18.258172', 'step': 7004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:18.289683', 'step': 7004, 'epoch': 1} {'type': 'loss', 'content': 0.14889094233512878, 'timestamp': '2025-09-10 02:34:18.291738', 'step': 7005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:18.325481', 'step': 7005, 'epoch': 1} {'type': 'loss', 'content': 0.13647234439849854, 'timestamp': '2025-09-10 02:34:18.327718', 'step': 7006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:18.357723', 'step': 7006, 'epoch': 1} {'type': 'loss', 'content': 0.14783094823360443, 'timestamp': '2025-09-10 02:34:18.359893', 'step': 7007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:18.390530', 'step': 7007, 'epoch': 1} {'type': 'loss', 'content': 0.11046730726957321, 'timestamp': '2025-09-10 02:34:18.414095', 'step': 7008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:18.445731', 'step': 7008, 'epoch': 1} {'type': 'loss', 'content': 0.15611612796783447, 'timestamp': '2025-09-10 02:34:18.447820', 'step': 7009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:18.477436', 'step': 7009, 'epoch': 1} {'type': 'loss', 'content': 0.10868657380342484, 'timestamp': '2025-09-10 02:34:18.479770', 'step': 7010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:18.509787', 'step': 7010, 'epoch': 1} {'type': 'loss', 'content': 0.11776240170001984, 'timestamp': '2025-09-10 02:34:18.512130', 'step': 7011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:18.543281', 'step': 7011, 'epoch': 1} {'type': 'loss', 'content': 0.15689030289649963, 'timestamp': '2025-09-10 02:34:18.566915', 'step': 7012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:18.599682', 'step': 7012, 'epoch': 1} {'type': 'loss', 'content': 0.23811817169189453, 'timestamp': '2025-09-10 02:34:18.601994', 'step': 7013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:18.631755', 'step': 7013, 'epoch': 1} {'type': 'loss', 'content': 0.2113162726163864, 'timestamp': '2025-09-10 02:34:18.634472', 'step': 7014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:18.665659', 'step': 7014, 'epoch': 1} {'type': 'loss', 'content': 0.15241746604442596, 'timestamp': '2025-09-10 02:34:18.668037', 'step': 7015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:18.698998', 'step': 7015, 'epoch': 1} {'type': 'loss', 'content': 0.21773283183574677, 'timestamp': '2025-09-10 02:34:18.722380', 'step': 7016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:18.755892', 'step': 7016, 'epoch': 1} {'type': 'loss', 'content': 0.22169047594070435, 'timestamp': '2025-09-10 02:34:18.758099', 'step': 7017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:18.788180', 'step': 7017, 'epoch': 1} {'type': 'loss', 'content': 0.2038460671901703, 'timestamp': '2025-09-10 02:34:18.790551', 'step': 7018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:18.820897', 'step': 7018, 'epoch': 1} {'type': 'loss', 'content': 0.19715869426727295, 'timestamp': '2025-09-10 02:34:18.823084', 'step': 7019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:18.853359', 'step': 7019, 'epoch': 1} {'type': 'loss', 'content': 0.11418675631284714, 'timestamp': '2025-09-10 02:34:18.877393', 'step': 7020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:18.908328', 'step': 7020, 'epoch': 1} {'type': 'loss', 'content': 0.1704622358083725, 'timestamp': '2025-09-10 02:34:18.910726', 'step': 7021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:18.940608', 'step': 7021, 'epoch': 1} {'type': 'loss', 'content': 0.13045798242092133, 'timestamp': '2025-09-10 02:34:18.942809', 'step': 7022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:18.973648', 'step': 7022, 'epoch': 1} {'type': 'loss', 'content': 0.1384359896183014, 'timestamp': '2025-09-10 02:34:18.976535', 'step': 7023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.006583', 'step': 7023, 'epoch': 1} {'type': 'loss', 'content': 0.09592706710100174, 'timestamp': '2025-09-10 02:34:19.030171', 'step': 7024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.062207', 'step': 7024, 'epoch': 1} {'type': 'loss', 'content': 0.1921153962612152, 'timestamp': '2025-09-10 02:34:19.064630', 'step': 7025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:19.103284', 'step': 7025, 'epoch': 1} {'type': 'loss', 'content': 0.2830812931060791, 'timestamp': '2025-09-10 02:34:19.105604', 'step': 7026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.135485', 'step': 7026, 'epoch': 1} {'type': 'loss', 'content': 0.17482033371925354, 'timestamp': '2025-09-10 02:34:19.138932', 'step': 7027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.172428', 'step': 7027, 'epoch': 1} {'type': 'loss', 'content': 0.14267189800739288, 'timestamp': '2025-09-10 02:34:19.196695', 'step': 7028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:19.230227', 'step': 7028, 'epoch': 1} {'type': 'loss', 'content': 0.10785479843616486, 'timestamp': '2025-09-10 02:34:19.232428', 'step': 7029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:19.270947', 'step': 7029, 'epoch': 1} {'type': 'loss', 'content': 0.15002478659152985, 'timestamp': '2025-09-10 02:34:19.274892', 'step': 7030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:19.309674', 'step': 7030, 'epoch': 1} {'type': 'loss', 'content': 0.06681082397699356, 'timestamp': '2025-09-10 02:34:19.313538', 'step': 7031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:19.345857', 'step': 7031, 'epoch': 1} {'type': 'loss', 'content': 0.0943887010216713, 'timestamp': '2025-09-10 02:34:19.369676', 'step': 7032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:19.399664', 'step': 7032, 'epoch': 1} {'type': 'loss', 'content': 0.13300171494483948, 'timestamp': '2025-09-10 02:34:19.402940', 'step': 7033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.434434', 'step': 7033, 'epoch': 1} {'type': 'loss', 'content': 0.1617969125509262, 'timestamp': '2025-09-10 02:34:19.438076', 'step': 7034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:19.467529', 'step': 7034, 'epoch': 1} {'type': 'loss', 'content': 0.10208111256361008, 'timestamp': '2025-09-10 02:34:19.470187', 'step': 7035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:19.500305', 'step': 7035, 'epoch': 1} {'type': 'loss', 'content': 0.10687102377414703, 'timestamp': '2025-09-10 02:34:19.524564', 'step': 7036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:19.555022', 'step': 7036, 'epoch': 1} {'type': 'loss', 'content': 0.13883669674396515, 'timestamp': '2025-09-10 02:34:19.557655', 'step': 7037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.587686', 'step': 7037, 'epoch': 1} {'type': 'loss', 'content': 0.11133092641830444, 'timestamp': '2025-09-10 02:34:19.590386', 'step': 7038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:19.625540', 'step': 7038, 'epoch': 1} {'type': 'loss', 'content': 0.13950839638710022, 'timestamp': '2025-09-10 02:34:19.630596', 'step': 7039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.662023', 'step': 7039, 'epoch': 1} {'type': 'loss', 'content': 0.17698410153388977, 'timestamp': '2025-09-10 02:34:19.685926', 'step': 7040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.715818', 'step': 7040, 'epoch': 1} {'type': 'loss', 'content': 0.10423336178064346, 'timestamp': '2025-09-10 02:34:19.717717', 'step': 7041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.752770', 'step': 7041, 'epoch': 1} {'type': 'loss', 'content': 0.21042096614837646, 'timestamp': '2025-09-10 02:34:19.755165', 'step': 7042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.788351', 'step': 7042, 'epoch': 1} {'type': 'loss', 'content': 0.14852529764175415, 'timestamp': '2025-09-10 02:34:19.790626', 'step': 7043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:19.820390', 'step': 7043, 'epoch': 1} {'type': 'loss', 'content': 0.1464313417673111, 'timestamp': '2025-09-10 02:34:19.843789', 'step': 7044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:19.874676', 'step': 7044, 'epoch': 1} {'type': 'loss', 'content': 0.12913274765014648, 'timestamp': '2025-09-10 02:34:19.876940', 'step': 7045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.906102', 'step': 7045, 'epoch': 1} {'type': 'loss', 'content': 0.10365214943885803, 'timestamp': '2025-09-10 02:34:19.908508', 'step': 7046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.940382', 'step': 7046, 'epoch': 1} {'type': 'loss', 'content': 0.18332569301128387, 'timestamp': '2025-09-10 02:34:19.943558', 'step': 7047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:19.975625', 'step': 7047, 'epoch': 1} {'type': 'loss', 'content': 0.1903056502342224, 'timestamp': '2025-09-10 02:34:19.999423', 'step': 7048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.029519', 'step': 7048, 'epoch': 1} {'type': 'loss', 'content': 0.12719343602657318, 'timestamp': '2025-09-10 02:34:20.031865', 'step': 7049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.061970', 'step': 7049, 'epoch': 1} {'type': 'loss', 'content': 0.11957863718271255, 'timestamp': '2025-09-10 02:34:20.064694', 'step': 7050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.096004', 'step': 7050, 'epoch': 1} {'type': 'loss', 'content': 0.21194882690906525, 'timestamp': '2025-09-10 02:34:20.099855', 'step': 7051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:20.132115', 'step': 7051, 'epoch': 1} {'type': 'loss', 'content': 0.20201122760772705, 'timestamp': '2025-09-10 02:34:20.156277', 'step': 7052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.187636', 'step': 7052, 'epoch': 1} {'type': 'loss', 'content': 0.1570168286561966, 'timestamp': '2025-09-10 02:34:20.189972', 'step': 7053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:20.219711', 'step': 7053, 'epoch': 1} {'type': 'loss', 'content': 0.1675301343202591, 'timestamp': '2025-09-10 02:34:20.222341', 'step': 7054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:20.251766', 'step': 7054, 'epoch': 1} {'type': 'loss', 'content': 0.16225196421146393, 'timestamp': '2025-09-10 02:34:20.254362', 'step': 7055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.283496', 'step': 7055, 'epoch': 1} {'type': 'loss', 'content': 0.08207428455352783, 'timestamp': '2025-09-10 02:34:20.307390', 'step': 7056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:20.337287', 'step': 7056, 'epoch': 1} {'type': 'loss', 'content': 0.18358415365219116, 'timestamp': '2025-09-10 02:34:20.339704', 'step': 7057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:20.369308', 'step': 7057, 'epoch': 1} {'type': 'loss', 'content': 0.19829867780208588, 'timestamp': '2025-09-10 02:34:20.372098', 'step': 7058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:20.402547', 'step': 7058, 'epoch': 1} {'type': 'loss', 'content': 0.2652362287044525, 'timestamp': '2025-09-10 02:34:20.405825', 'step': 7059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:20.435581', 'step': 7059, 'epoch': 1} {'type': 'loss', 'content': 0.30684781074523926, 'timestamp': '2025-09-10 02:34:20.459186', 'step': 7060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:20.489014', 'step': 7060, 'epoch': 1} {'type': 'loss', 'content': 0.13420253992080688, 'timestamp': '2025-09-10 02:34:20.491488', 'step': 7061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:20.520923', 'step': 7061, 'epoch': 1} {'type': 'loss', 'content': 0.2829534709453583, 'timestamp': '2025-09-10 02:34:20.523851', 'step': 7062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.553911', 'step': 7062, 'epoch': 1} {'type': 'loss', 'content': 0.21821822226047516, 'timestamp': '2025-09-10 02:34:20.556245', 'step': 7063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:20.585977', 'step': 7063, 'epoch': 1} {'type': 'loss', 'content': 0.22242288291454315, 'timestamp': '2025-09-10 02:34:20.609440', 'step': 7064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.639413', 'step': 7064, 'epoch': 1} {'type': 'loss', 'content': 0.14968068897724152, 'timestamp': '2025-09-10 02:34:20.641914', 'step': 7065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.671483', 'step': 7065, 'epoch': 1} {'type': 'loss', 'content': 0.20007885992527008, 'timestamp': '2025-09-10 02:34:20.674464', 'step': 7066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:20.708700', 'step': 7066, 'epoch': 1} {'type': 'loss', 'content': 0.18964678049087524, 'timestamp': '2025-09-10 02:34:20.713287', 'step': 7067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.743862', 'step': 7067, 'epoch': 1} {'type': 'loss', 'content': 0.10331440716981888, 'timestamp': '2025-09-10 02:34:20.767577', 'step': 7068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.798081', 'step': 7068, 'epoch': 1} {'type': 'loss', 'content': 0.14334091544151306, 'timestamp': '2025-09-10 02:34:20.800502', 'step': 7069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:20.830440', 'step': 7069, 'epoch': 1} {'type': 'loss', 'content': 0.1185174435377121, 'timestamp': '2025-09-10 02:34:20.832904', 'step': 7070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:20.862642', 'step': 7070, 'epoch': 1} {'type': 'loss', 'content': 0.1910548210144043, 'timestamp': '2025-09-10 02:34:20.864977', 'step': 7071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:20.894476', 'step': 7071, 'epoch': 1} {'type': 'loss', 'content': 0.1689339131116867, 'timestamp': '2025-09-10 02:34:20.918442', 'step': 7072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:20.948330', 'step': 7072, 'epoch': 1} {'type': 'loss', 'content': 0.135110542178154, 'timestamp': '2025-09-10 02:34:20.951400', 'step': 7073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:20.982181', 'step': 7073, 'epoch': 1} {'type': 'loss', 'content': 0.14512105286121368, 'timestamp': '2025-09-10 02:34:20.984813', 'step': 7074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:21.014761', 'step': 7074, 'epoch': 1} {'type': 'loss', 'content': 0.08129865676164627, 'timestamp': '2025-09-10 02:34:21.018121', 'step': 7075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.049390', 'step': 7075, 'epoch': 1} {'type': 'loss', 'content': 0.09534456580877304, 'timestamp': '2025-09-10 02:34:21.073821', 'step': 7076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:21.109236', 'step': 7076, 'epoch': 1} {'type': 'loss', 'content': 0.10664426535367966, 'timestamp': '2025-09-10 02:34:21.112861', 'step': 7077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:21.143685', 'step': 7077, 'epoch': 1} {'type': 'loss', 'content': 0.16138851642608643, 'timestamp': '2025-09-10 02:34:21.146404', 'step': 7078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.179599', 'step': 7078, 'epoch': 1} {'type': 'loss', 'content': 0.17262102663516998, 'timestamp': '2025-09-10 02:34:21.182460', 'step': 7079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:21.212636', 'step': 7079, 'epoch': 1} {'type': 'loss', 'content': 0.10100973397493362, 'timestamp': '2025-09-10 02:34:21.236295', 'step': 7080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.266359', 'step': 7080, 'epoch': 1} {'type': 'loss', 'content': 0.14553049206733704, 'timestamp': '2025-09-10 02:34:21.268503', 'step': 7081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.298991', 'step': 7081, 'epoch': 1} {'type': 'loss', 'content': 0.15883365273475647, 'timestamp': '2025-09-10 02:34:21.301200', 'step': 7082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:21.330721', 'step': 7082, 'epoch': 1} {'type': 'loss', 'content': 0.11178673058748245, 'timestamp': '2025-09-10 02:34:21.333935', 'step': 7083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.364081', 'step': 7083, 'epoch': 1} {'type': 'loss', 'content': 0.16557037830352783, 'timestamp': '2025-09-10 02:34:21.387847', 'step': 7084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:21.418267', 'step': 7084, 'epoch': 1} {'type': 'loss', 'content': 0.1851428747177124, 'timestamp': '2025-09-10 02:34:21.420438', 'step': 7085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.450239', 'step': 7085, 'epoch': 1} {'type': 'loss', 'content': 0.15123143792152405, 'timestamp': '2025-09-10 02:34:21.452486', 'step': 7086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.483141', 'step': 7086, 'epoch': 1} {'type': 'loss', 'content': 0.18075630068778992, 'timestamp': '2025-09-10 02:34:21.485852', 'step': 7087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.516034', 'step': 7087, 'epoch': 1} {'type': 'loss', 'content': 0.12895476818084717, 'timestamp': '2025-09-10 02:34:21.539837', 'step': 7088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.569713', 'step': 7088, 'epoch': 1} {'type': 'loss', 'content': 0.09454178065061569, 'timestamp': '2025-09-10 02:34:21.572386', 'step': 7089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:21.604416', 'step': 7089, 'epoch': 1} {'type': 'loss', 'content': 0.04630787670612335, 'timestamp': '2025-09-10 02:34:21.606834', 'step': 7090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.637076', 'step': 7090, 'epoch': 1} {'type': 'loss', 'content': 0.15715670585632324, 'timestamp': '2025-09-10 02:34:21.639410', 'step': 7091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:21.669125', 'step': 7091, 'epoch': 1} {'type': 'loss', 'content': 0.10197591781616211, 'timestamp': '2025-09-10 02:34:21.692924', 'step': 7092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:21.724199', 'step': 7092, 'epoch': 1} {'type': 'loss', 'content': 0.10168301314115524, 'timestamp': '2025-09-10 02:34:21.726656', 'step': 7093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:21.757398', 'step': 7093, 'epoch': 1} {'type': 'loss', 'content': 0.10739646106958389, 'timestamp': '2025-09-10 02:34:21.760160', 'step': 7094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.790213', 'step': 7094, 'epoch': 1} {'type': 'loss', 'content': 0.21391259133815765, 'timestamp': '2025-09-10 02:34:21.792445', 'step': 7095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:21.822700', 'step': 7095, 'epoch': 1} {'type': 'loss', 'content': 0.15768449008464813, 'timestamp': '2025-09-10 02:34:21.846462', 'step': 7096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:21.876651', 'step': 7096, 'epoch': 1} {'type': 'loss', 'content': 0.11460427939891815, 'timestamp': '2025-09-10 02:34:21.879089', 'step': 7097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:21.909316', 'step': 7097, 'epoch': 1} {'type': 'loss', 'content': 0.21143148839473724, 'timestamp': '2025-09-10 02:34:21.911668', 'step': 7098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:21.942213', 'step': 7098, 'epoch': 1} {'type': 'loss', 'content': 0.10923069715499878, 'timestamp': '2025-09-10 02:34:21.944650', 'step': 7099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:21.974734', 'step': 7099, 'epoch': 1} {'type': 'loss', 'content': 0.17947925627231598, 'timestamp': '2025-09-10 02:34:21.998707', 'step': 7100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:22.029367', 'step': 7100, 'epoch': 1} {'type': 'loss', 'content': 0.15877217054367065, 'timestamp': '2025-09-10 02:34:22.031419', 'step': 7101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:22.061327', 'step': 7101, 'epoch': 1} {'type': 'loss', 'content': 0.20881137251853943, 'timestamp': '2025-09-10 02:34:22.063839', 'step': 7102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:22.094676', 'step': 7102, 'epoch': 1} {'type': 'loss', 'content': 0.1353021264076233, 'timestamp': '2025-09-10 02:34:22.097035', 'step': 7103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:22.125968', 'step': 7103, 'epoch': 1} {'type': 'loss', 'content': 0.18759813904762268, 'timestamp': '2025-09-10 02:34:22.149308', 'step': 7104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:22.179331', 'step': 7104, 'epoch': 1} {'type': 'loss', 'content': 0.09654368460178375, 'timestamp': '2025-09-10 02:34:22.181317', 'step': 7105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:22.210438', 'step': 7105, 'epoch': 1} {'type': 'loss', 'content': 0.1842379868030548, 'timestamp': '2025-09-10 02:34:22.212317', 'step': 7106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:22.242141', 'step': 7106, 'epoch': 1} {'type': 'loss', 'content': 0.11846289038658142, 'timestamp': '2025-09-10 02:34:22.246145', 'step': 7107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:22.277247', 'step': 7107, 'epoch': 1} {'type': 'loss', 'content': 0.17182855308055878, 'timestamp': '2025-09-10 02:34:22.302099', 'step': 7108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:22.331186', 'step': 7108, 'epoch': 1} {'type': 'loss', 'content': 0.10163672268390656, 'timestamp': '2025-09-10 02:34:22.333256', 'step': 7109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:22.362637', 'step': 7109, 'epoch': 1} {'type': 'loss', 'content': 0.1244867667555809, 'timestamp': '2025-09-10 02:34:22.365161', 'step': 7110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:22.396257', 'step': 7110, 'epoch': 1} {'type': 'loss', 'content': 0.13684025406837463, 'timestamp': '2025-09-10 02:34:22.398479', 'step': 7111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:22.427964', 'step': 7111, 'epoch': 1} {'type': 'loss', 'content': 0.21441039443016052, 'timestamp': '2025-09-10 02:34:22.451136', 'step': 7112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:22.480248', 'step': 7112, 'epoch': 1} {'type': 'loss', 'content': 0.14009417593479156, 'timestamp': '2025-09-10 02:34:22.482607', 'step': 7113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:22.511745', 'step': 7113, 'epoch': 1} {'type': 'loss', 'content': 0.09043020755052567, 'timestamp': '2025-09-10 02:34:22.514143', 'step': 7114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:22.543464', 'step': 7114, 'epoch': 1} {'type': 'loss', 'content': 0.17477169632911682, 'timestamp': '2025-09-10 02:34:22.545884', 'step': 7115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:22.580213', 'step': 7115, 'epoch': 1} {'type': 'loss', 'content': 0.09162815660238266, 'timestamp': '2025-09-10 02:34:22.607733', 'step': 7116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:22.645972', 'step': 7116, 'epoch': 1} {'type': 'loss', 'content': 0.193574458360672, 'timestamp': '2025-09-10 02:34:22.649632', 'step': 7117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:22.682488', 'step': 7117, 'epoch': 1} {'type': 'loss', 'content': 0.24711553752422333, 'timestamp': '2025-09-10 02:34:22.684627', 'step': 7118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:22.718522', 'step': 7118, 'epoch': 1} {'type': 'loss', 'content': 0.09537918865680695, 'timestamp': '2025-09-10 02:34:22.720753', 'step': 7119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:22.750864', 'step': 7119, 'epoch': 1} {'type': 'loss', 'content': 0.10658726096153259, 'timestamp': '2025-09-10 02:34:22.774104', 'step': 7120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:22.804870', 'step': 7120, 'epoch': 1} {'type': 'loss', 'content': 0.16988466680049896, 'timestamp': '2025-09-10 02:34:22.807106', 'step': 7121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:22.836996', 'step': 7121, 'epoch': 1} {'type': 'loss', 'content': 0.11018642038106918, 'timestamp': '2025-09-10 02:34:22.838960', 'step': 7122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:22.868549', 'step': 7122, 'epoch': 1} {'type': 'loss', 'content': 0.13955336809158325, 'timestamp': '2025-09-10 02:34:22.871742', 'step': 7123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:22.901722', 'step': 7123, 'epoch': 1} {'type': 'loss', 'content': 0.12348734587430954, 'timestamp': '2025-09-10 02:34:22.925083', 'step': 7124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:22.954676', 'step': 7124, 'epoch': 1} {'type': 'loss', 'content': 0.09915703535079956, 'timestamp': '2025-09-10 02:34:22.957418', 'step': 7125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:22.986522', 'step': 7125, 'epoch': 1} {'type': 'loss', 'content': 0.09646973013877869, 'timestamp': '2025-09-10 02:34:22.988459', 'step': 7126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:23.018191', 'step': 7126, 'epoch': 1} {'type': 'loss', 'content': 0.07939746975898743, 'timestamp': '2025-09-10 02:34:23.020773', 'step': 7127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:23.051234', 'step': 7127, 'epoch': 1} {'type': 'loss', 'content': 0.22702345252037048, 'timestamp': '2025-09-10 02:34:23.076174', 'step': 7128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:23.107783', 'step': 7128, 'epoch': 1} {'type': 'loss', 'content': 0.14670540392398834, 'timestamp': '2025-09-10 02:34:23.109945', 'step': 7129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:23.139448', 'step': 7129, 'epoch': 1} {'type': 'loss', 'content': 0.2123578041791916, 'timestamp': '2025-09-10 02:34:23.141439', 'step': 7130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:23.172343', 'step': 7130, 'epoch': 1} {'type': 'loss', 'content': 0.11568102240562439, 'timestamp': '2025-09-10 02:34:23.174729', 'step': 7131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:23.204299', 'step': 7131, 'epoch': 1} {'type': 'loss', 'content': 0.18781594932079315, 'timestamp': '2025-09-10 02:34:23.227854', 'step': 7132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:23.257644', 'step': 7132, 'epoch': 1} {'type': 'loss', 'content': 0.12539604306221008, 'timestamp': '2025-09-10 02:34:23.259812', 'step': 7133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:23.288956', 'step': 7133, 'epoch': 1} {'type': 'loss', 'content': 0.10483936965465546, 'timestamp': '2025-09-10 02:34:23.290915', 'step': 7134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:23.320668', 'step': 7134, 'epoch': 1} {'type': 'loss', 'content': 0.1581839770078659, 'timestamp': '2025-09-10 02:34:23.322929', 'step': 7135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:23.352588', 'step': 7135, 'epoch': 1} {'type': 'loss', 'content': 0.15608245134353638, 'timestamp': '2025-09-10 02:34:23.376125', 'step': 7136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:23.405995', 'step': 7136, 'epoch': 1} {'type': 'loss', 'content': 0.0937281996011734, 'timestamp': '2025-09-10 02:34:23.408631', 'step': 7137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:23.439615', 'step': 7137, 'epoch': 1} {'type': 'loss', 'content': 0.09326320886611938, 'timestamp': '2025-09-10 02:34:23.441896', 'step': 7138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:23.471201', 'step': 7138, 'epoch': 1} {'type': 'loss', 'content': 0.06125873327255249, 'timestamp': '2025-09-10 02:34:23.473477', 'step': 7139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:23.503335', 'step': 7139, 'epoch': 1} {'type': 'loss', 'content': 0.12030231207609177, 'timestamp': '2025-09-10 02:34:23.526850', 'step': 7140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:23.556525', 'step': 7140, 'epoch': 1} {'type': 'loss', 'content': 0.10636618733406067, 'timestamp': '2025-09-10 02:34:23.558623', 'step': 7141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:23.589149', 'step': 7141, 'epoch': 1} {'type': 'loss', 'content': 0.09981928020715714, 'timestamp': '2025-09-10 02:34:23.591019', 'step': 7142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:23.620302', 'step': 7142, 'epoch': 1} {'type': 'loss', 'content': 0.16299906373023987, 'timestamp': '2025-09-10 02:34:23.622638', 'step': 7143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:23.652619', 'step': 7143, 'epoch': 1} {'type': 'loss', 'content': 0.15860050916671753, 'timestamp': '2025-09-10 02:34:23.676485', 'step': 7144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:23.707195', 'step': 7144, 'epoch': 1} {'type': 'loss', 'content': 0.085331030189991, 'timestamp': '2025-09-10 02:34:23.709372', 'step': 7145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:23.739029', 'step': 7145, 'epoch': 1} {'type': 'loss', 'content': 0.1623704880475998, 'timestamp': '2025-09-10 02:34:23.741601', 'step': 7146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:23.771494', 'step': 7146, 'epoch': 1} {'type': 'loss', 'content': 0.09762518107891083, 'timestamp': '2025-09-10 02:34:23.773921', 'step': 7147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:23.803843', 'step': 7147, 'epoch': 1} {'type': 'loss', 'content': 0.11109913885593414, 'timestamp': '2025-09-10 02:34:23.827037', 'step': 7148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:23.856677', 'step': 7148, 'epoch': 1} {'type': 'loss', 'content': 0.16396577656269073, 'timestamp': '2025-09-10 02:34:23.858701', 'step': 7149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:23.888936', 'step': 7149, 'epoch': 1} {'type': 'loss', 'content': 0.13325190544128418, 'timestamp': '2025-09-10 02:34:23.891272', 'step': 7150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:23.921972', 'step': 7150, 'epoch': 1} {'type': 'loss', 'content': 0.11202618479728699, 'timestamp': '2025-09-10 02:34:23.923909', 'step': 7151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:23.953547', 'step': 7151, 'epoch': 1} {'type': 'loss', 'content': 0.22768151760101318, 'timestamp': '2025-09-10 02:34:23.977108', 'step': 7152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:24.008369', 'step': 7152, 'epoch': 1} {'type': 'loss', 'content': 0.1085587590932846, 'timestamp': '2025-09-10 02:34:24.010517', 'step': 7153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:24.041503', 'step': 7153, 'epoch': 1} {'type': 'loss', 'content': 0.11582961678504944, 'timestamp': '2025-09-10 02:34:24.043857', 'step': 7154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:24.073538', 'step': 7154, 'epoch': 1} {'type': 'loss', 'content': 0.11200067400932312, 'timestamp': '2025-09-10 02:34:24.075923', 'step': 7155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:24.106010', 'step': 7155, 'epoch': 1} {'type': 'loss', 'content': 0.12416549026966095, 'timestamp': '2025-09-10 02:34:24.129466', 'step': 7156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:24.159362', 'step': 7156, 'epoch': 1} {'type': 'loss', 'content': 0.1316646933555603, 'timestamp': '2025-09-10 02:34:24.161359', 'step': 7157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:34:24.192733', 'step': 7157, 'epoch': 1} {'type': 'loss', 'content': 0.18088844418525696, 'timestamp': '2025-09-10 02:34:24.197341', 'step': 7158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:24.227402', 'step': 7158, 'epoch': 1} {'type': 'loss', 'content': 0.20116488635540009, 'timestamp': '2025-09-10 02:34:24.229584', 'step': 7159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:24.259929', 'step': 7159, 'epoch': 1} {'type': 'loss', 'content': 0.09743539243936539, 'timestamp': '2025-09-10 02:34:24.283697', 'step': 7160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:24.314486', 'step': 7160, 'epoch': 1} {'type': 'loss', 'content': 0.254637748003006, 'timestamp': '2025-09-10 02:34:24.317017', 'step': 7161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:24.348752', 'step': 7161, 'epoch': 1} {'type': 'loss', 'content': 0.16310207545757294, 'timestamp': '2025-09-10 02:34:24.351375', 'step': 7162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:24.380630', 'step': 7162, 'epoch': 1} {'type': 'loss', 'content': 0.18652062118053436, 'timestamp': '2025-09-10 02:34:24.383314', 'step': 7163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:24.413494', 'step': 7163, 'epoch': 1} {'type': 'loss', 'content': 0.13361027836799622, 'timestamp': '2025-09-10 02:34:24.437237', 'step': 7164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:24.466933', 'step': 7164, 'epoch': 1} {'type': 'loss', 'content': 0.15580511093139648, 'timestamp': '2025-09-10 02:34:24.468845', 'step': 7165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:24.499745', 'step': 7165, 'epoch': 1} {'type': 'loss', 'content': 0.10695474594831467, 'timestamp': '2025-09-10 02:34:24.502111', 'step': 7166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:24.532186', 'step': 7166, 'epoch': 1} {'type': 'loss', 'content': 0.2505851686000824, 'timestamp': '2025-09-10 02:34:24.534231', 'step': 7167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:24.564446', 'step': 7167, 'epoch': 1} {'type': 'loss', 'content': 0.10196499526500702, 'timestamp': '2025-09-10 02:34:24.587880', 'step': 7168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:24.617341', 'step': 7168, 'epoch': 1} {'type': 'loss', 'content': 0.20137955248355865, 'timestamp': '2025-09-10 02:34:24.619515', 'step': 7169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:24.649977', 'step': 7169, 'epoch': 1} {'type': 'loss', 'content': 0.11261038482189178, 'timestamp': '2025-09-10 02:34:24.652187', 'step': 7170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:24.682086', 'step': 7170, 'epoch': 1} {'type': 'loss', 'content': 0.1374741643667221, 'timestamp': '2025-09-10 02:34:24.684574', 'step': 7171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:24.714574', 'step': 7171, 'epoch': 1} {'type': 'loss', 'content': 0.16900570690631866, 'timestamp': '2025-09-10 02:34:24.738727', 'step': 7172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:24.767974', 'step': 7172, 'epoch': 1} {'type': 'loss', 'content': 0.16568538546562195, 'timestamp': '2025-09-10 02:34:24.770251', 'step': 7173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:24.808446', 'step': 7173, 'epoch': 1} {'type': 'loss', 'content': 0.1261114776134491, 'timestamp': '2025-09-10 02:34:24.810644', 'step': 7174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:24.840709', 'step': 7174, 'epoch': 1} {'type': 'loss', 'content': 0.12318409234285355, 'timestamp': '2025-09-10 02:34:24.843265', 'step': 7175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:24.873104', 'step': 7175, 'epoch': 1} {'type': 'loss', 'content': 0.161665141582489, 'timestamp': '2025-09-10 02:34:24.896401', 'step': 7176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:24.925806', 'step': 7176, 'epoch': 1} {'type': 'loss', 'content': 0.1910383701324463, 'timestamp': '2025-09-10 02:34:24.928015', 'step': 7177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:24.957971', 'step': 7177, 'epoch': 1} {'type': 'loss', 'content': 0.09843406826257706, 'timestamp': '2025-09-10 02:34:24.972771', 'step': 7178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:25.020158', 'step': 7178, 'epoch': 1} {'type': 'loss', 'content': 0.07764656096696854, 'timestamp': '2025-09-10 02:34:25.035756', 'step': 7179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:25.117278', 'step': 7179, 'epoch': 1} {'type': 'loss', 'content': 0.14690804481506348, 'timestamp': '2025-09-10 02:34:25.153393', 'step': 7180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:25.199173', 'step': 7180, 'epoch': 1} {'type': 'loss', 'content': 0.23912402987480164, 'timestamp': '2025-09-10 02:34:25.202980', 'step': 7181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:25.277191', 'step': 7181, 'epoch': 1} {'type': 'loss', 'content': 0.14019334316253662, 'timestamp': '2025-09-10 02:34:25.300780', 'step': 7182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:25.368270', 'step': 7182, 'epoch': 1} {'type': 'loss', 'content': 0.14351393282413483, 'timestamp': '2025-09-10 02:34:25.371371', 'step': 7183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:25.406359', 'step': 7183, 'epoch': 1} {'type': 'loss', 'content': 0.20322534441947937, 'timestamp': '2025-09-10 02:34:25.449225', 'step': 7184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:25.542172', 'step': 7184, 'epoch': 1} {'type': 'loss', 'content': 0.19667595624923706, 'timestamp': '2025-09-10 02:34:25.550103', 'step': 7185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:25.583980', 'step': 7185, 'epoch': 1} {'type': 'loss', 'content': 0.10241424292325974, 'timestamp': '2025-09-10 02:34:25.590327', 'step': 7186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:25.628689', 'step': 7186, 'epoch': 1} {'type': 'loss', 'content': 0.17873436212539673, 'timestamp': '2025-09-10 02:34:25.631416', 'step': 7187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:25.661173', 'step': 7187, 'epoch': 1} {'type': 'loss', 'content': 0.1402776539325714, 'timestamp': '2025-09-10 02:34:25.684881', 'step': 7188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:25.716349', 'step': 7188, 'epoch': 1} {'type': 'loss', 'content': 0.22352387011051178, 'timestamp': '2025-09-10 02:34:25.721790', 'step': 7189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:25.755919', 'step': 7189, 'epoch': 1} {'type': 'loss', 'content': 0.1060614287853241, 'timestamp': '2025-09-10 02:34:25.758666', 'step': 7190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:25.789342', 'step': 7190, 'epoch': 1} {'type': 'loss', 'content': 0.16021955013275146, 'timestamp': '2025-09-10 02:34:25.791269', 'step': 7191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:25.821966', 'step': 7191, 'epoch': 1} {'type': 'loss', 'content': 0.19671785831451416, 'timestamp': '2025-09-10 02:34:25.846016', 'step': 7192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:25.876339', 'step': 7192, 'epoch': 1} {'type': 'loss', 'content': 0.17688602209091187, 'timestamp': '2025-09-10 02:34:25.878407', 'step': 7193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:25.907676', 'step': 7193, 'epoch': 1} {'type': 'loss', 'content': 0.1565404087305069, 'timestamp': '2025-09-10 02:34:25.909632', 'step': 7194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:25.939924', 'step': 7194, 'epoch': 1} {'type': 'loss', 'content': 0.18019331991672516, 'timestamp': '2025-09-10 02:34:25.941910', 'step': 7195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:25.972281', 'step': 7195, 'epoch': 1} {'type': 'loss', 'content': 0.16606983542442322, 'timestamp': '2025-09-10 02:34:25.995916', 'step': 7196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:26.026731', 'step': 7196, 'epoch': 1} {'type': 'loss', 'content': 0.19651569426059723, 'timestamp': '2025-09-10 02:34:26.029386', 'step': 7197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:26.058712', 'step': 7197, 'epoch': 1} {'type': 'loss', 'content': 0.07495640218257904, 'timestamp': '2025-09-10 02:34:26.061109', 'step': 7198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:26.092257', 'step': 7198, 'epoch': 1} {'type': 'loss', 'content': 0.13832104206085205, 'timestamp': '2025-09-10 02:34:26.094534', 'step': 7199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.124171', 'step': 7199, 'epoch': 1} {'type': 'loss', 'content': 0.12349850684404373, 'timestamp': '2025-09-10 02:34:26.148086', 'step': 7200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:26.178443', 'step': 7200, 'epoch': 1} {'type': 'loss', 'content': 0.07130397111177444, 'timestamp': '2025-09-10 02:34:26.180415', 'step': 7201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:26.210324', 'step': 7201, 'epoch': 1} {'type': 'loss', 'content': 0.14133287966251373, 'timestamp': '2025-09-10 02:34:26.212729', 'step': 7202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.242465', 'step': 7202, 'epoch': 1} {'type': 'loss', 'content': 0.23829160630702972, 'timestamp': '2025-09-10 02:34:26.245026', 'step': 7203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:26.274969', 'step': 7203, 'epoch': 1} {'type': 'loss', 'content': 0.17611704766750336, 'timestamp': '2025-09-10 02:34:26.298385', 'step': 7204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:26.332465', 'step': 7204, 'epoch': 1} {'type': 'loss', 'content': 0.22879716753959656, 'timestamp': '2025-09-10 02:34:26.334813', 'step': 7205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:26.365153', 'step': 7205, 'epoch': 1} {'type': 'loss', 'content': 0.12420432269573212, 'timestamp': '2025-09-10 02:34:26.367241', 'step': 7206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.396725', 'step': 7206, 'epoch': 1} {'type': 'loss', 'content': 0.21199598908424377, 'timestamp': '2025-09-10 02:34:26.398867', 'step': 7207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.428092', 'step': 7207, 'epoch': 1} {'type': 'loss', 'content': 0.15966445207595825, 'timestamp': '2025-09-10 02:34:26.451455', 'step': 7208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:26.481246', 'step': 7208, 'epoch': 1} {'type': 'loss', 'content': 0.16024072468280792, 'timestamp': '2025-09-10 02:34:26.483233', 'step': 7209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:26.513495', 'step': 7209, 'epoch': 1} {'type': 'loss', 'content': 0.12301425635814667, 'timestamp': '2025-09-10 02:34:26.517703', 'step': 7210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.547893', 'step': 7210, 'epoch': 1} {'type': 'loss', 'content': 0.08573733270168304, 'timestamp': '2025-09-10 02:34:26.549991', 'step': 7211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:26.579076', 'step': 7211, 'epoch': 1} {'type': 'loss', 'content': 0.11938662827014923, 'timestamp': '2025-09-10 02:34:26.602422', 'step': 7212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.632279', 'step': 7212, 'epoch': 1} {'type': 'loss', 'content': 0.14097802340984344, 'timestamp': '2025-09-10 02:34:26.634591', 'step': 7213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:26.664185', 'step': 7213, 'epoch': 1} {'type': 'loss', 'content': 0.16868047416210175, 'timestamp': '2025-09-10 02:34:26.666092', 'step': 7214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.695508', 'step': 7214, 'epoch': 1} {'type': 'loss', 'content': 0.11770033836364746, 'timestamp': '2025-09-10 02:34:26.697684', 'step': 7215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.727656', 'step': 7215, 'epoch': 1} {'type': 'loss', 'content': 0.2049878090620041, 'timestamp': '2025-09-10 02:34:26.751148', 'step': 7216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:26.780766', 'step': 7216, 'epoch': 1} {'type': 'loss', 'content': 0.16596144437789917, 'timestamp': '2025-09-10 02:34:26.783150', 'step': 7217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.812623', 'step': 7217, 'epoch': 1} {'type': 'loss', 'content': 0.08560558408498764, 'timestamp': '2025-09-10 02:34:26.815452', 'step': 7218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:26.845251', 'step': 7218, 'epoch': 1} {'type': 'loss', 'content': 0.08593016862869263, 'timestamp': '2025-09-10 02:34:26.847699', 'step': 7219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:26.877808', 'step': 7219, 'epoch': 1} {'type': 'loss', 'content': 0.1493716686964035, 'timestamp': '2025-09-10 02:34:26.901375', 'step': 7220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:26.931105', 'step': 7220, 'epoch': 1} {'type': 'loss', 'content': 0.13975034654140472, 'timestamp': '2025-09-10 02:34:26.933139', 'step': 7221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:26.963095', 'step': 7221, 'epoch': 1} {'type': 'loss', 'content': 0.12493547797203064, 'timestamp': '2025-09-10 02:34:26.965184', 'step': 7222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:26.995245', 'step': 7222, 'epoch': 1} {'type': 'loss', 'content': 0.15442794561386108, 'timestamp': '2025-09-10 02:34:26.997313', 'step': 7223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:27.026608', 'step': 7223, 'epoch': 1} {'type': 'loss', 'content': 0.10321897268295288, 'timestamp': '2025-09-10 02:34:27.049822', 'step': 7224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:27.079686', 'step': 7224, 'epoch': 1} {'type': 'loss', 'content': 0.20957039296627045, 'timestamp': '2025-09-10 02:34:27.081817', 'step': 7225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:27.111778', 'step': 7225, 'epoch': 1} {'type': 'loss', 'content': 0.1400837004184723, 'timestamp': '2025-09-10 02:34:27.113917', 'step': 7226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:27.144961', 'step': 7226, 'epoch': 1} {'type': 'loss', 'content': 0.13800199329853058, 'timestamp': '2025-09-10 02:34:27.147136', 'step': 7227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.178239', 'step': 7227, 'epoch': 1} {'type': 'loss', 'content': 0.08941631764173508, 'timestamp': '2025-09-10 02:34:27.201403', 'step': 7228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:27.231226', 'step': 7228, 'epoch': 1} {'type': 'loss', 'content': 0.16496054828166962, 'timestamp': '2025-09-10 02:34:27.233278', 'step': 7229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:27.262706', 'step': 7229, 'epoch': 1} {'type': 'loss', 'content': 0.10767912119626999, 'timestamp': '2025-09-10 02:34:27.264917', 'step': 7230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.295200', 'step': 7230, 'epoch': 1} {'type': 'loss', 'content': 0.17380602657794952, 'timestamp': '2025-09-10 02:34:27.297627', 'step': 7231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.328011', 'step': 7231, 'epoch': 1} {'type': 'loss', 'content': 0.09650028496980667, 'timestamp': '2025-09-10 02:34:27.352428', 'step': 7232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.383535', 'step': 7232, 'epoch': 1} {'type': 'loss', 'content': 0.10634846985340118, 'timestamp': '2025-09-10 02:34:27.386564', 'step': 7233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.419196', 'step': 7233, 'epoch': 1} {'type': 'loss', 'content': 0.09470055997371674, 'timestamp': '2025-09-10 02:34:27.422264', 'step': 7234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.451914', 'step': 7234, 'epoch': 1} {'type': 'loss', 'content': 0.061537839472293854, 'timestamp': '2025-09-10 02:34:27.453945', 'step': 7235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:27.483433', 'step': 7235, 'epoch': 1} {'type': 'loss', 'content': 0.08663517981767654, 'timestamp': '2025-09-10 02:34:27.506773', 'step': 7236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.535922', 'step': 7236, 'epoch': 1} {'type': 'loss', 'content': 0.11959625780582428, 'timestamp': '2025-09-10 02:34:27.537935', 'step': 7237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.567195', 'step': 7237, 'epoch': 1} {'type': 'loss', 'content': 0.069017194211483, 'timestamp': '2025-09-10 02:34:27.569660', 'step': 7238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:27.600196', 'step': 7238, 'epoch': 1} {'type': 'loss', 'content': 0.13190387189388275, 'timestamp': '2025-09-10 02:34:27.602804', 'step': 7239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:27.633346', 'step': 7239, 'epoch': 1} {'type': 'loss', 'content': 0.22494670748710632, 'timestamp': '2025-09-10 02:34:27.656517', 'step': 7240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:27.685970', 'step': 7240, 'epoch': 1} {'type': 'loss', 'content': 0.1119723990559578, 'timestamp': '2025-09-10 02:34:27.687949', 'step': 7241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:27.717432', 'step': 7241, 'epoch': 1} {'type': 'loss', 'content': 0.06459981203079224, 'timestamp': '2025-09-10 02:34:27.723118', 'step': 7242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.756769', 'step': 7242, 'epoch': 1} {'type': 'loss', 'content': 0.21481339633464813, 'timestamp': '2025-09-10 02:34:27.759026', 'step': 7243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:27.788642', 'step': 7243, 'epoch': 1} {'type': 'loss', 'content': 0.11510620266199112, 'timestamp': '2025-09-10 02:34:27.812517', 'step': 7244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:27.842319', 'step': 7244, 'epoch': 1} {'type': 'loss', 'content': 0.17854012548923492, 'timestamp': '2025-09-10 02:34:27.844733', 'step': 7245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:27.874572', 'step': 7245, 'epoch': 1} {'type': 'loss', 'content': 0.14724698662757874, 'timestamp': '2025-09-10 02:34:27.876762', 'step': 7246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:27.905986', 'step': 7246, 'epoch': 1} {'type': 'loss', 'content': 0.09028948098421097, 'timestamp': '2025-09-10 02:34:27.908020', 'step': 7247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:27.937350', 'step': 7247, 'epoch': 1} {'type': 'loss', 'content': 0.18808723986148834, 'timestamp': '2025-09-10 02:34:27.960916', 'step': 7248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:27.991147', 'step': 7248, 'epoch': 1} {'type': 'loss', 'content': 0.09517920762300491, 'timestamp': '2025-09-10 02:34:27.993789', 'step': 7249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.023170', 'step': 7249, 'epoch': 1} {'type': 'loss', 'content': 0.10635419934988022, 'timestamp': '2025-09-10 02:34:28.025293', 'step': 7250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:28.057148', 'step': 7250, 'epoch': 1} {'type': 'loss', 'content': 0.11589785665273666, 'timestamp': '2025-09-10 02:34:28.059092', 'step': 7251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:28.089234', 'step': 7251, 'epoch': 1} {'type': 'loss', 'content': 0.0973600521683693, 'timestamp': '2025-09-10 02:34:28.113488', 'step': 7252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.143686', 'step': 7252, 'epoch': 1} {'type': 'loss', 'content': 0.21325473487377167, 'timestamp': '2025-09-10 02:34:28.145897', 'step': 7253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:28.176273', 'step': 7253, 'epoch': 1} {'type': 'loss', 'content': 0.1275913417339325, 'timestamp': '2025-09-10 02:34:28.179064', 'step': 7254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.208654', 'step': 7254, 'epoch': 1} {'type': 'loss', 'content': 0.1005939394235611, 'timestamp': '2025-09-10 02:34:28.210939', 'step': 7255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:28.240324', 'step': 7255, 'epoch': 1} {'type': 'loss', 'content': 0.14639636874198914, 'timestamp': '2025-09-10 02:34:28.263415', 'step': 7256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:28.293598', 'step': 7256, 'epoch': 1} {'type': 'loss', 'content': 0.13223761320114136, 'timestamp': '2025-09-10 02:34:28.296526', 'step': 7257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:28.325901', 'step': 7257, 'epoch': 1} {'type': 'loss', 'content': 0.16233032941818237, 'timestamp': '2025-09-10 02:34:28.328257', 'step': 7258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:28.360582', 'step': 7258, 'epoch': 1} {'type': 'loss', 'content': 0.1729719191789627, 'timestamp': '2025-09-10 02:34:28.363225', 'step': 7259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.394159', 'step': 7259, 'epoch': 1} {'type': 'loss', 'content': 0.22725582122802734, 'timestamp': '2025-09-10 02:34:28.417763', 'step': 7260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:28.448679', 'step': 7260, 'epoch': 1} {'type': 'loss', 'content': 0.25934675335884094, 'timestamp': '2025-09-10 02:34:28.450785', 'step': 7261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:28.480395', 'step': 7261, 'epoch': 1} {'type': 'loss', 'content': 0.09836530685424805, 'timestamp': '2025-09-10 02:34:28.482753', 'step': 7262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.512882', 'step': 7262, 'epoch': 1} {'type': 'loss', 'content': 0.1341702938079834, 'timestamp': '2025-09-10 02:34:28.515154', 'step': 7263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.544469', 'step': 7263, 'epoch': 1} {'type': 'loss', 'content': 0.11166714131832123, 'timestamp': '2025-09-10 02:34:28.568040', 'step': 7264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.597656', 'step': 7264, 'epoch': 1} {'type': 'loss', 'content': 0.20386934280395508, 'timestamp': '2025-09-10 02:34:28.600127', 'step': 7265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:28.629678', 'step': 7265, 'epoch': 1} {'type': 'loss', 'content': 0.14027369022369385, 'timestamp': '2025-09-10 02:34:28.631980', 'step': 7266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.664185', 'step': 7266, 'epoch': 1} {'type': 'loss', 'content': 0.05642823874950409, 'timestamp': '2025-09-10 02:34:28.666347', 'step': 7267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.695500', 'step': 7267, 'epoch': 1} {'type': 'loss', 'content': 0.1315915733575821, 'timestamp': '2025-09-10 02:34:28.719380', 'step': 7268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.751757', 'step': 7268, 'epoch': 1} {'type': 'loss', 'content': 0.2188156545162201, 'timestamp': '2025-09-10 02:34:28.753820', 'step': 7269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:28.783766', 'step': 7269, 'epoch': 1} {'type': 'loss', 'content': 0.16845808923244476, 'timestamp': '2025-09-10 02:34:28.786294', 'step': 7270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:28.815940', 'step': 7270, 'epoch': 1} {'type': 'loss', 'content': 0.21017320454120636, 'timestamp': '2025-09-10 02:34:28.818246', 'step': 7271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:28.847922', 'step': 7271, 'epoch': 1} {'type': 'loss', 'content': 0.15167540311813354, 'timestamp': '2025-09-10 02:34:28.879553', 'step': 7272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:28.910374', 'step': 7272, 'epoch': 1} {'type': 'loss', 'content': 0.15353074669837952, 'timestamp': '2025-09-10 02:34:28.912978', 'step': 7273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:28.950765', 'step': 7273, 'epoch': 1} {'type': 'loss', 'content': 0.14251255989074707, 'timestamp': '2025-09-10 02:34:28.953034', 'step': 7274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:28.982763', 'step': 7274, 'epoch': 1} {'type': 'loss', 'content': 0.07171840965747833, 'timestamp': '2025-09-10 02:34:28.985234', 'step': 7275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:29.014497', 'step': 7275, 'epoch': 1} {'type': 'loss', 'content': 0.18408092856407166, 'timestamp': '2025-09-10 02:34:29.038158', 'step': 7276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:29.082149', 'step': 7276, 'epoch': 1} {'type': 'loss', 'content': 0.09249184280633926, 'timestamp': '2025-09-10 02:34:29.085968', 'step': 7277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:29.119027', 'step': 7277, 'epoch': 1} {'type': 'loss', 'content': 0.22513803839683533, 'timestamp': '2025-09-10 02:34:29.123166', 'step': 7278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.161476', 'step': 7278, 'epoch': 1} {'type': 'loss', 'content': 0.11260358989238739, 'timestamp': '2025-09-10 02:34:29.165404', 'step': 7279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.196838', 'step': 7279, 'epoch': 1} {'type': 'loss', 'content': 0.19422772526741028, 'timestamp': '2025-09-10 02:34:29.220662', 'step': 7280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:29.255278', 'step': 7280, 'epoch': 1} {'type': 'loss', 'content': 0.27574777603149414, 'timestamp': '2025-09-10 02:34:29.260822', 'step': 7281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.297780', 'step': 7281, 'epoch': 1} {'type': 'loss', 'content': 0.11048389971256256, 'timestamp': '2025-09-10 02:34:29.299834', 'step': 7282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:29.344166', 'step': 7282, 'epoch': 1} {'type': 'loss', 'content': 0.14789487421512604, 'timestamp': '2025-09-10 02:34:29.348585', 'step': 7283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:29.380861', 'step': 7283, 'epoch': 1} {'type': 'loss', 'content': 0.041173823177814484, 'timestamp': '2025-09-10 02:34:29.404360', 'step': 7284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:29.436856', 'step': 7284, 'epoch': 1} {'type': 'loss', 'content': 0.06083749979734421, 'timestamp': '2025-09-10 02:34:29.439155', 'step': 7285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:29.468973', 'step': 7285, 'epoch': 1} {'type': 'loss', 'content': 0.13988254964351654, 'timestamp': '2025-09-10 02:34:29.471776', 'step': 7286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:29.502074', 'step': 7286, 'epoch': 1} {'type': 'loss', 'content': 0.09536857157945633, 'timestamp': '2025-09-10 02:34:29.504325', 'step': 7287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.533901', 'step': 7287, 'epoch': 1} {'type': 'loss', 'content': 0.13035474717617035, 'timestamp': '2025-09-10 02:34:29.558705', 'step': 7288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:29.590170', 'step': 7288, 'epoch': 1} {'type': 'loss', 'content': 0.08075150102376938, 'timestamp': '2025-09-10 02:34:29.592466', 'step': 7289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.622940', 'step': 7289, 'epoch': 1} {'type': 'loss', 'content': 0.15371160209178925, 'timestamp': '2025-09-10 02:34:29.625187', 'step': 7290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:29.654828', 'step': 7290, 'epoch': 1} {'type': 'loss', 'content': 0.12360052764415741, 'timestamp': '2025-09-10 02:34:29.657176', 'step': 7291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.686745', 'step': 7291, 'epoch': 1} {'type': 'loss', 'content': 0.1656331866979599, 'timestamp': '2025-09-10 02:34:29.710289', 'step': 7292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.740958', 'step': 7292, 'epoch': 1} {'type': 'loss', 'content': 0.15565787255764008, 'timestamp': '2025-09-10 02:34:29.742964', 'step': 7293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.772448', 'step': 7293, 'epoch': 1} {'type': 'loss', 'content': 0.1164039745926857, 'timestamp': '2025-09-10 02:34:29.775227', 'step': 7294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.805882', 'step': 7294, 'epoch': 1} {'type': 'loss', 'content': 0.15387822687625885, 'timestamp': '2025-09-10 02:34:29.808268', 'step': 7295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.840333', 'step': 7295, 'epoch': 1} {'type': 'loss', 'content': 0.0710582509636879, 'timestamp': '2025-09-10 02:34:29.863622', 'step': 7296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:29.893238', 'step': 7296, 'epoch': 1} {'type': 'loss', 'content': 0.13653883337974548, 'timestamp': '2025-09-10 02:34:29.896769', 'step': 7297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.927975', 'step': 7297, 'epoch': 1} {'type': 'loss', 'content': 0.14994890987873077, 'timestamp': '2025-09-10 02:34:29.930382', 'step': 7298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:29.961202', 'step': 7298, 'epoch': 1} {'type': 'loss', 'content': 0.0887707769870758, 'timestamp': '2025-09-10 02:34:29.963767', 'step': 7299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:29.993974', 'step': 7299, 'epoch': 1} {'type': 'loss', 'content': 0.12492296099662781, 'timestamp': '2025-09-10 02:34:30.017388', 'step': 7300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:30.047684', 'step': 7300, 'epoch': 1} {'type': 'loss', 'content': 0.17090806365013123, 'timestamp': '2025-09-10 02:34:30.049661', 'step': 7301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:30.080663', 'step': 7301, 'epoch': 1} {'type': 'loss', 'content': 0.1557619422674179, 'timestamp': '2025-09-10 02:34:30.083467', 'step': 7302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:30.115025', 'step': 7302, 'epoch': 1} {'type': 'loss', 'content': 0.14519208669662476, 'timestamp': '2025-09-10 02:34:30.117910', 'step': 7303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:30.148647', 'step': 7303, 'epoch': 1} {'type': 'loss', 'content': 0.089727021753788, 'timestamp': '2025-09-10 02:34:30.172163', 'step': 7304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:30.202937', 'step': 7304, 'epoch': 1} {'type': 'loss', 'content': 0.1378738433122635, 'timestamp': '2025-09-10 02:34:30.205858', 'step': 7305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:30.235756', 'step': 7305, 'epoch': 1} {'type': 'loss', 'content': 0.16946889460086823, 'timestamp': '2025-09-10 02:34:30.238256', 'step': 7306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:30.268627', 'step': 7306, 'epoch': 1} {'type': 'loss', 'content': 0.1251574605703354, 'timestamp': '2025-09-10 02:34:30.271255', 'step': 7307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:30.301227', 'step': 7307, 'epoch': 1} {'type': 'loss', 'content': 0.14436446130275726, 'timestamp': '2025-09-10 02:34:30.324847', 'step': 7308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:30.354657', 'step': 7308, 'epoch': 1} {'type': 'loss', 'content': 0.1806352287530899, 'timestamp': '2025-09-10 02:34:30.357423', 'step': 7309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:30.388392', 'step': 7309, 'epoch': 1} {'type': 'loss', 'content': 0.18561497330665588, 'timestamp': '2025-09-10 02:34:30.390644', 'step': 7310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:30.421341', 'step': 7310, 'epoch': 1} {'type': 'loss', 'content': 0.15646466612815857, 'timestamp': '2025-09-10 02:34:30.423307', 'step': 7311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:30.452729', 'step': 7311, 'epoch': 1} {'type': 'loss', 'content': 0.16370531916618347, 'timestamp': '2025-09-10 02:34:30.476333', 'step': 7312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:30.506776', 'step': 7312, 'epoch': 1} {'type': 'loss', 'content': 0.15299762785434723, 'timestamp': '2025-09-10 02:34:30.509010', 'step': 7313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:30.538547', 'step': 7313, 'epoch': 1} {'type': 'loss', 'content': 0.1647949516773224, 'timestamp': '2025-09-10 02:34:30.540955', 'step': 7314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:30.571265', 'step': 7314, 'epoch': 1} {'type': 'loss', 'content': 0.19195425510406494, 'timestamp': '2025-09-10 02:34:30.573536', 'step': 7315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:30.603183', 'step': 7315, 'epoch': 1} {'type': 'loss', 'content': 0.11222507804632187, 'timestamp': '2025-09-10 02:34:30.626940', 'step': 7316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:30.657164', 'step': 7316, 'epoch': 1} {'type': 'loss', 'content': 0.197287455201149, 'timestamp': '2025-09-10 02:34:30.659485', 'step': 7317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:30.692250', 'step': 7317, 'epoch': 1} {'type': 'loss', 'content': 0.18234002590179443, 'timestamp': '2025-09-10 02:34:30.694716', 'step': 7318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:30.724030', 'step': 7318, 'epoch': 1} {'type': 'loss', 'content': 0.11485319584608078, 'timestamp': '2025-09-10 02:34:30.731013', 'step': 7319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:30.768221', 'step': 7319, 'epoch': 1} {'type': 'loss', 'content': 0.18454371392726898, 'timestamp': '2025-09-10 02:34:30.792031', 'step': 7320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:30.822224', 'step': 7320, 'epoch': 1} {'type': 'loss', 'content': 0.2500725984573364, 'timestamp': '2025-09-10 02:34:30.824547', 'step': 7321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:30.854427', 'step': 7321, 'epoch': 1} {'type': 'loss', 'content': 0.16872969269752502, 'timestamp': '2025-09-10 02:34:30.856627', 'step': 7322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:30.886922', 'step': 7322, 'epoch': 1} {'type': 'loss', 'content': 0.1466706395149231, 'timestamp': '2025-09-10 02:34:30.889249', 'step': 7323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:30.919578', 'step': 7323, 'epoch': 1} {'type': 'loss', 'content': 0.09231249988079071, 'timestamp': '2025-09-10 02:34:30.942984', 'step': 7324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:30.972740', 'step': 7324, 'epoch': 1} {'type': 'loss', 'content': 0.10072427242994308, 'timestamp': '2025-09-10 02:34:30.975084', 'step': 7325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.004729', 'step': 7325, 'epoch': 1} {'type': 'loss', 'content': 0.14460137486457825, 'timestamp': '2025-09-10 02:34:31.013316', 'step': 7326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.058193', 'step': 7326, 'epoch': 1} {'type': 'loss', 'content': 0.11184635013341904, 'timestamp': '2025-09-10 02:34:31.064940', 'step': 7327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:31.104489', 'step': 7327, 'epoch': 1} {'type': 'loss', 'content': 0.15876634418964386, 'timestamp': '2025-09-10 02:34:31.128199', 'step': 7328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:31.158974', 'step': 7328, 'epoch': 1} {'type': 'loss', 'content': 0.10058537125587463, 'timestamp': '2025-09-10 02:34:31.160969', 'step': 7329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:31.190371', 'step': 7329, 'epoch': 1} {'type': 'loss', 'content': 0.20057743787765503, 'timestamp': '2025-09-10 02:34:31.192544', 'step': 7330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:31.224722', 'step': 7330, 'epoch': 1} {'type': 'loss', 'content': 0.11552957445383072, 'timestamp': '2025-09-10 02:34:31.227407', 'step': 7331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.257564', 'step': 7331, 'epoch': 1} {'type': 'loss', 'content': 0.1468365341424942, 'timestamp': '2025-09-10 02:34:31.281207', 'step': 7332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:31.310723', 'step': 7332, 'epoch': 1} {'type': 'loss', 'content': 0.15314923226833344, 'timestamp': '2025-09-10 02:34:31.313319', 'step': 7333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.343491', 'step': 7333, 'epoch': 1} {'type': 'loss', 'content': 0.09491004049777985, 'timestamp': '2025-09-10 02:34:31.347564', 'step': 7334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.377808', 'step': 7334, 'epoch': 1} {'type': 'loss', 'content': 0.0889366865158081, 'timestamp': '2025-09-10 02:34:31.380566', 'step': 7335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.410288', 'step': 7335, 'epoch': 1} {'type': 'loss', 'content': 0.17152170836925507, 'timestamp': '2025-09-10 02:34:31.433842', 'step': 7336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:31.463580', 'step': 7336, 'epoch': 1} {'type': 'loss', 'content': 0.20974981784820557, 'timestamp': '2025-09-10 02:34:31.465699', 'step': 7337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:31.495264', 'step': 7337, 'epoch': 1} {'type': 'loss', 'content': 0.11236224323511124, 'timestamp': '2025-09-10 02:34:31.497414', 'step': 7338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:31.528936', 'step': 7338, 'epoch': 1} {'type': 'loss', 'content': 0.1578698605298996, 'timestamp': '2025-09-10 02:34:31.531798', 'step': 7339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:31.561491', 'step': 7339, 'epoch': 1} {'type': 'loss', 'content': 0.15813884139060974, 'timestamp': '2025-09-10 02:34:31.585391', 'step': 7340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:31.614736', 'step': 7340, 'epoch': 1} {'type': 'loss', 'content': 0.192922905087471, 'timestamp': '2025-09-10 02:34:31.617322', 'step': 7341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:31.647396', 'step': 7341, 'epoch': 1} {'type': 'loss', 'content': 0.16927240788936615, 'timestamp': '2025-09-10 02:34:31.649493', 'step': 7342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:31.679590', 'step': 7342, 'epoch': 1} {'type': 'loss', 'content': 0.11956676095724106, 'timestamp': '2025-09-10 02:34:31.682142', 'step': 7343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:31.711868', 'step': 7343, 'epoch': 1} {'type': 'loss', 'content': 0.2584158778190613, 'timestamp': '2025-09-10 02:34:31.735132', 'step': 7344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.765276', 'step': 7344, 'epoch': 1} {'type': 'loss', 'content': 0.09313223510980606, 'timestamp': '2025-09-10 02:34:31.767347', 'step': 7345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.797005', 'step': 7345, 'epoch': 1} {'type': 'loss', 'content': 0.12569330632686615, 'timestamp': '2025-09-10 02:34:31.799345', 'step': 7346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.829124', 'step': 7346, 'epoch': 1} {'type': 'loss', 'content': 0.127966970205307, 'timestamp': '2025-09-10 02:34:31.831313', 'step': 7347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.861131', 'step': 7347, 'epoch': 1} {'type': 'loss', 'content': 0.12052129954099655, 'timestamp': '2025-09-10 02:34:31.884791', 'step': 7348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:31.915191', 'step': 7348, 'epoch': 1} {'type': 'loss', 'content': 0.07612302899360657, 'timestamp': '2025-09-10 02:34:31.917399', 'step': 7349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:31.946756', 'step': 7349, 'epoch': 1} {'type': 'loss', 'content': 0.09612862765789032, 'timestamp': '2025-09-10 02:34:31.949955', 'step': 7350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:31.979777', 'step': 7350, 'epoch': 1} {'type': 'loss', 'content': 0.11024771630764008, 'timestamp': '2025-09-10 02:34:31.983419', 'step': 7351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:32.013577', 'step': 7351, 'epoch': 1} {'type': 'loss', 'content': 0.16721296310424805, 'timestamp': '2025-09-10 02:34:32.037005', 'step': 7352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:32.068255', 'step': 7352, 'epoch': 1} {'type': 'loss', 'content': 0.15580736100673676, 'timestamp': '2025-09-10 02:34:32.070343', 'step': 7353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:32.100324', 'step': 7353, 'epoch': 1} {'type': 'loss', 'content': 0.1790759265422821, 'timestamp': '2025-09-10 02:34:32.102483', 'step': 7354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:32.133437', 'step': 7354, 'epoch': 1} {'type': 'loss', 'content': 0.2187860757112503, 'timestamp': '2025-09-10 02:34:32.135341', 'step': 7355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:32.164720', 'step': 7355, 'epoch': 1} {'type': 'loss', 'content': 0.10013674199581146, 'timestamp': '2025-09-10 02:34:32.188276', 'step': 7356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:32.217793', 'step': 7356, 'epoch': 1} {'type': 'loss', 'content': 0.21435509622097015, 'timestamp': '2025-09-10 02:34:32.219692', 'step': 7357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:32.249714', 'step': 7357, 'epoch': 1} {'type': 'loss', 'content': 0.088272325694561, 'timestamp': '2025-09-10 02:34:32.251849', 'step': 7358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:32.281699', 'step': 7358, 'epoch': 1} {'type': 'loss', 'content': 0.1695771962404251, 'timestamp': '2025-09-10 02:34:32.285911', 'step': 7359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:32.316477', 'step': 7359, 'epoch': 1} {'type': 'loss', 'content': 0.14183229207992554, 'timestamp': '2025-09-10 02:34:32.340009', 'step': 7360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:32.369572', 'step': 7360, 'epoch': 1} {'type': 'loss', 'content': 0.18638376891613007, 'timestamp': '2025-09-10 02:34:32.371710', 'step': 7361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:32.401244', 'step': 7361, 'epoch': 1} {'type': 'loss', 'content': 0.1076560840010643, 'timestamp': '2025-09-10 02:34:32.403326', 'step': 7362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:32.432646', 'step': 7362, 'epoch': 1} {'type': 'loss', 'content': 0.0854087769985199, 'timestamp': '2025-09-10 02:34:32.435201', 'step': 7363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:32.465258', 'step': 7363, 'epoch': 1} {'type': 'loss', 'content': 0.2168102264404297, 'timestamp': '2025-09-10 02:34:32.488815', 'step': 7364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:32.518531', 'step': 7364, 'epoch': 1} {'type': 'loss', 'content': 0.2228609174489975, 'timestamp': '2025-09-10 02:34:32.520919', 'step': 7365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:32.550598', 'step': 7365, 'epoch': 1} {'type': 'loss', 'content': 0.06835020333528519, 'timestamp': '2025-09-10 02:34:32.552771', 'step': 7366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:32.582248', 'step': 7366, 'epoch': 1} {'type': 'loss', 'content': 0.14556172490119934, 'timestamp': '2025-09-10 02:34:32.584267', 'step': 7367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:32.614275', 'step': 7367, 'epoch': 1} {'type': 'loss', 'content': 0.21206387877464294, 'timestamp': '2025-09-10 02:34:32.638467', 'step': 7368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:32.668737', 'step': 7368, 'epoch': 1} {'type': 'loss', 'content': 0.1653321534395218, 'timestamp': '2025-09-10 02:34:32.671244', 'step': 7369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:32.700517', 'step': 7369, 'epoch': 1} {'type': 'loss', 'content': 0.12636813521385193, 'timestamp': '2025-09-10 02:34:32.703107', 'step': 7370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:32.732793', 'step': 7370, 'epoch': 1} {'type': 'loss', 'content': 0.1146048828959465, 'timestamp': '2025-09-10 02:34:32.738176', 'step': 7371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:32.774163', 'step': 7371, 'epoch': 1} {'type': 'loss', 'content': 0.14197812974452972, 'timestamp': '2025-09-10 02:34:32.797489', 'step': 7372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:32.827425', 'step': 7372, 'epoch': 1} {'type': 'loss', 'content': 0.1698940247297287, 'timestamp': '2025-09-10 02:34:32.829935', 'step': 7373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:32.860395', 'step': 7373, 'epoch': 1} {'type': 'loss', 'content': 0.1483185887336731, 'timestamp': '2025-09-10 02:34:32.862634', 'step': 7374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:32.893704', 'step': 7374, 'epoch': 1} {'type': 'loss', 'content': 0.13921301066875458, 'timestamp': '2025-09-10 02:34:32.895974', 'step': 7375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:32.926921', 'step': 7375, 'epoch': 1} {'type': 'loss', 'content': 0.2143562138080597, 'timestamp': '2025-09-10 02:34:32.950052', 'step': 7376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:32.980375', 'step': 7376, 'epoch': 1} {'type': 'loss', 'content': 0.19096210598945618, 'timestamp': '2025-09-10 02:34:32.982518', 'step': 7377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.011952', 'step': 7377, 'epoch': 1} {'type': 'loss', 'content': 0.12668544054031372, 'timestamp': '2025-09-10 02:34:33.014421', 'step': 7378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.044197', 'step': 7378, 'epoch': 1} {'type': 'loss', 'content': 0.11016002297401428, 'timestamp': '2025-09-10 02:34:33.046678', 'step': 7379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.077333', 'step': 7379, 'epoch': 1} {'type': 'loss', 'content': 0.14730727672576904, 'timestamp': '2025-09-10 02:34:33.100575', 'step': 7380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.130196', 'step': 7380, 'epoch': 1} {'type': 'loss', 'content': 0.20863784849643707, 'timestamp': '2025-09-10 02:34:33.132325', 'step': 7381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:33.162450', 'step': 7381, 'epoch': 1} {'type': 'loss', 'content': 0.18606962263584137, 'timestamp': '2025-09-10 02:34:33.164632', 'step': 7382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.194133', 'step': 7382, 'epoch': 1} {'type': 'loss', 'content': 0.145856574177742, 'timestamp': '2025-09-10 02:34:33.197644', 'step': 7383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.227653', 'step': 7383, 'epoch': 1} {'type': 'loss', 'content': 0.13568979501724243, 'timestamp': '2025-09-10 02:34:33.251322', 'step': 7384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.280973', 'step': 7384, 'epoch': 1} {'type': 'loss', 'content': 0.22249861061573029, 'timestamp': '2025-09-10 02:34:33.285177', 'step': 7385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.321137', 'step': 7385, 'epoch': 1} {'type': 'loss', 'content': 0.15106922388076782, 'timestamp': '2025-09-10 02:34:33.323425', 'step': 7386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.353017', 'step': 7386, 'epoch': 1} {'type': 'loss', 'content': 0.07444237172603607, 'timestamp': '2025-09-10 02:34:33.355150', 'step': 7387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.384752', 'step': 7387, 'epoch': 1} {'type': 'loss', 'content': 0.11687370389699936, 'timestamp': '2025-09-10 02:34:33.408439', 'step': 7388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.438639', 'step': 7388, 'epoch': 1} {'type': 'loss', 'content': 0.2102721780538559, 'timestamp': '2025-09-10 02:34:33.441544', 'step': 7389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.471874', 'step': 7389, 'epoch': 1} {'type': 'loss', 'content': 0.14641797542572021, 'timestamp': '2025-09-10 02:34:33.474309', 'step': 7390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.516079', 'step': 7390, 'epoch': 1} {'type': 'loss', 'content': 0.23990964889526367, 'timestamp': '2025-09-10 02:34:33.518866', 'step': 7391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.548608', 'step': 7391, 'epoch': 1} {'type': 'loss', 'content': 0.14423491060733795, 'timestamp': '2025-09-10 02:34:33.572234', 'step': 7392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:33.601949', 'step': 7392, 'epoch': 1} {'type': 'loss', 'content': 0.19538617134094238, 'timestamp': '2025-09-10 02:34:33.604325', 'step': 7393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.639692', 'step': 7393, 'epoch': 1} {'type': 'loss', 'content': 0.14637009799480438, 'timestamp': '2025-09-10 02:34:33.643698', 'step': 7394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.674808', 'step': 7394, 'epoch': 1} {'type': 'loss', 'content': 0.1443457305431366, 'timestamp': '2025-09-10 02:34:33.682219', 'step': 7395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.719244', 'step': 7395, 'epoch': 1} {'type': 'loss', 'content': 0.12273064255714417, 'timestamp': '2025-09-10 02:34:33.742761', 'step': 7396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:33.772673', 'step': 7396, 'epoch': 1} {'type': 'loss', 'content': 0.11285530775785446, 'timestamp': '2025-09-10 02:34:33.774933', 'step': 7397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:33.804189', 'step': 7397, 'epoch': 1} {'type': 'loss', 'content': 0.10377415269613266, 'timestamp': '2025-09-10 02:34:33.807094', 'step': 7398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.837266', 'step': 7398, 'epoch': 1} {'type': 'loss', 'content': 0.14000946283340454, 'timestamp': '2025-09-10 02:34:33.839260', 'step': 7399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.869525', 'step': 7399, 'epoch': 1} {'type': 'loss', 'content': 0.12461506575345993, 'timestamp': '2025-09-10 02:34:33.895351', 'step': 7400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:33.925448', 'step': 7400, 'epoch': 1} {'type': 'loss', 'content': 0.22367848455905914, 'timestamp': '2025-09-10 02:34:33.927713', 'step': 7401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:33.957654', 'step': 7401, 'epoch': 1} {'type': 'loss', 'content': 0.14608660340309143, 'timestamp': '2025-09-10 02:34:33.960412', 'step': 7402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:33.992475', 'step': 7402, 'epoch': 1} {'type': 'loss', 'content': 0.13448168337345123, 'timestamp': '2025-09-10 02:34:33.994841', 'step': 7403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.024660', 'step': 7403, 'epoch': 1} {'type': 'loss', 'content': 0.1807740479707718, 'timestamp': '2025-09-10 02:34:34.048104', 'step': 7404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:34.079294', 'step': 7404, 'epoch': 1} {'type': 'loss', 'content': 0.10201214998960495, 'timestamp': '2025-09-10 02:34:34.081720', 'step': 7405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:34.113204', 'step': 7405, 'epoch': 1} {'type': 'loss', 'content': 0.13924112915992737, 'timestamp': '2025-09-10 02:34:34.119420', 'step': 7406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.154905', 'step': 7406, 'epoch': 1} {'type': 'loss', 'content': 0.19729137420654297, 'timestamp': '2025-09-10 02:34:34.156971', 'step': 7407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.187510', 'step': 7407, 'epoch': 1} {'type': 'loss', 'content': 0.3013283610343933, 'timestamp': '2025-09-10 02:34:34.211034', 'step': 7408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:34.242623', 'step': 7408, 'epoch': 1} {'type': 'loss', 'content': 0.22923311591148376, 'timestamp': '2025-09-10 02:34:34.245398', 'step': 7409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.275843', 'step': 7409, 'epoch': 1} {'type': 'loss', 'content': 0.10646270960569382, 'timestamp': '2025-09-10 02:34:34.278004', 'step': 7410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.308403', 'step': 7410, 'epoch': 1} {'type': 'loss', 'content': 0.0717661902308464, 'timestamp': '2025-09-10 02:34:34.312237', 'step': 7411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:34.341593', 'step': 7411, 'epoch': 1} {'type': 'loss', 'content': 0.11316026002168655, 'timestamp': '2025-09-10 02:34:34.365630', 'step': 7412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:34.395218', 'step': 7412, 'epoch': 1} {'type': 'loss', 'content': 0.1753949373960495, 'timestamp': '2025-09-10 02:34:34.397464', 'step': 7413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:34.427577', 'step': 7413, 'epoch': 1} {'type': 'loss', 'content': 0.05731072649359703, 'timestamp': '2025-09-10 02:34:34.429695', 'step': 7414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.461580', 'step': 7414, 'epoch': 1} {'type': 'loss', 'content': 0.06677889823913574, 'timestamp': '2025-09-10 02:34:34.464151', 'step': 7415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:34.495613', 'step': 7415, 'epoch': 1} {'type': 'loss', 'content': 0.11288772523403168, 'timestamp': '2025-09-10 02:34:34.519456', 'step': 7416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:34.549473', 'step': 7416, 'epoch': 1} {'type': 'loss', 'content': 0.21435916423797607, 'timestamp': '2025-09-10 02:34:34.551623', 'step': 7417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:34.581789', 'step': 7417, 'epoch': 1} {'type': 'loss', 'content': 0.07195860147476196, 'timestamp': '2025-09-10 02:34:34.584429', 'step': 7418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.615027', 'step': 7418, 'epoch': 1} {'type': 'loss', 'content': 0.15516607463359833, 'timestamp': '2025-09-10 02:34:34.617287', 'step': 7419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:34.647003', 'step': 7419, 'epoch': 1} {'type': 'loss', 'content': 0.14176088571548462, 'timestamp': '2025-09-10 02:34:34.671265', 'step': 7420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:34.701970', 'step': 7420, 'epoch': 1} {'type': 'loss', 'content': 0.08030165731906891, 'timestamp': '2025-09-10 02:34:34.704281', 'step': 7421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.735691', 'step': 7421, 'epoch': 1} {'type': 'loss', 'content': 0.17452585697174072, 'timestamp': '2025-09-10 02:34:34.740336', 'step': 7422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.776483', 'step': 7422, 'epoch': 1} {'type': 'loss', 'content': 0.1616237312555313, 'timestamp': '2025-09-10 02:34:34.778646', 'step': 7423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.807964', 'step': 7423, 'epoch': 1} {'type': 'loss', 'content': 0.0925506055355072, 'timestamp': '2025-09-10 02:34:34.831413', 'step': 7424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.861315', 'step': 7424, 'epoch': 1} {'type': 'loss', 'content': 0.24144244194030762, 'timestamp': '2025-09-10 02:34:34.863814', 'step': 7425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:34.893826', 'step': 7425, 'epoch': 1} {'type': 'loss', 'content': 0.11048371344804764, 'timestamp': '2025-09-10 02:34:34.896805', 'step': 7426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:34.929587', 'step': 7426, 'epoch': 1} {'type': 'loss', 'content': 0.10413747280836105, 'timestamp': '2025-09-10 02:34:34.931778', 'step': 7427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:34.961498', 'step': 7427, 'epoch': 1} {'type': 'loss', 'content': 0.07534793019294739, 'timestamp': '2025-09-10 02:34:34.985022', 'step': 7428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:35.014359', 'step': 7428, 'epoch': 1} {'type': 'loss', 'content': 0.16562676429748535, 'timestamp': '2025-09-10 02:34:35.016444', 'step': 7429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.047822', 'step': 7429, 'epoch': 1} {'type': 'loss', 'content': 0.11033990979194641, 'timestamp': '2025-09-10 02:34:35.049915', 'step': 7430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:35.079516', 'step': 7430, 'epoch': 1} {'type': 'loss', 'content': 0.178294837474823, 'timestamp': '2025-09-10 02:34:35.083086', 'step': 7431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:35.114254', 'step': 7431, 'epoch': 1} {'type': 'loss', 'content': 0.1745675653219223, 'timestamp': '2025-09-10 02:34:35.137753', 'step': 7432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:35.167415', 'step': 7432, 'epoch': 1} {'type': 'loss', 'content': 0.1436711847782135, 'timestamp': '2025-09-10 02:34:35.170162', 'step': 7433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:35.200397', 'step': 7433, 'epoch': 1} {'type': 'loss', 'content': 0.1665256917476654, 'timestamp': '2025-09-10 02:34:35.202822', 'step': 7434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.233488', 'step': 7434, 'epoch': 1} {'type': 'loss', 'content': 0.141226127743721, 'timestamp': '2025-09-10 02:34:35.235494', 'step': 7435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.265415', 'step': 7435, 'epoch': 1} {'type': 'loss', 'content': 0.12452056258916855, 'timestamp': '2025-09-10 02:34:35.289016', 'step': 7436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.318840', 'step': 7436, 'epoch': 1} {'type': 'loss', 'content': 0.16519027948379517, 'timestamp': '2025-09-10 02:34:35.321269', 'step': 7437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:35.352881', 'step': 7437, 'epoch': 1} {'type': 'loss', 'content': 0.0791533812880516, 'timestamp': '2025-09-10 02:34:35.354952', 'step': 7438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:35.387051', 'step': 7438, 'epoch': 1} {'type': 'loss', 'content': 0.11761146038770676, 'timestamp': '2025-09-10 02:34:35.389015', 'step': 7439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:35.418585', 'step': 7439, 'epoch': 1} {'type': 'loss', 'content': 0.2160194218158722, 'timestamp': '2025-09-10 02:34:35.442322', 'step': 7440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:35.472867', 'step': 7440, 'epoch': 1} {'type': 'loss', 'content': 0.1524745672941208, 'timestamp': '2025-09-10 02:34:35.476054', 'step': 7441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:35.506185', 'step': 7441, 'epoch': 1} {'type': 'loss', 'content': 0.15341633558273315, 'timestamp': '2025-09-10 02:34:35.509297', 'step': 7442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:34:35.539625', 'step': 7442, 'epoch': 1} {'type': 'loss', 'content': 0.2630959749221802, 'timestamp': '2025-09-10 02:34:35.544428', 'step': 7443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:35.574575', 'step': 7443, 'epoch': 1} {'type': 'loss', 'content': 0.05955348536372185, 'timestamp': '2025-09-10 02:34:35.598024', 'step': 7444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.628281', 'step': 7444, 'epoch': 1} {'type': 'loss', 'content': 0.13445216417312622, 'timestamp': '2025-09-10 02:34:35.630418', 'step': 7445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.659540', 'step': 7445, 'epoch': 1} {'type': 'loss', 'content': 0.12494824081659317, 'timestamp': '2025-09-10 02:34:35.661731', 'step': 7446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.691969', 'step': 7446, 'epoch': 1} {'type': 'loss', 'content': 0.23554418981075287, 'timestamp': '2025-09-10 02:34:35.694190', 'step': 7447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.723585', 'step': 7447, 'epoch': 1} {'type': 'loss', 'content': 0.15301015973091125, 'timestamp': '2025-09-10 02:34:35.747212', 'step': 7448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:35.785332', 'step': 7448, 'epoch': 1} {'type': 'loss', 'content': 0.11343973129987717, 'timestamp': '2025-09-10 02:34:35.787437', 'step': 7449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.817263', 'step': 7449, 'epoch': 1} {'type': 'loss', 'content': 0.1370459645986557, 'timestamp': '2025-09-10 02:34:35.819504', 'step': 7450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:35.849485', 'step': 7450, 'epoch': 1} {'type': 'loss', 'content': 0.1542884260416031, 'timestamp': '2025-09-10 02:34:35.852091', 'step': 7451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.882202', 'step': 7451, 'epoch': 1} {'type': 'loss', 'content': 0.1051938384771347, 'timestamp': '2025-09-10 02:34:35.906062', 'step': 7452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:35.935498', 'step': 7452, 'epoch': 1} {'type': 'loss', 'content': 0.09559290111064911, 'timestamp': '2025-09-10 02:34:35.937387', 'step': 7453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:35.966884', 'step': 7453, 'epoch': 1} {'type': 'loss', 'content': 0.055112168192863464, 'timestamp': '2025-09-10 02:34:35.969014', 'step': 7454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:35.999075', 'step': 7454, 'epoch': 1} {'type': 'loss', 'content': 0.35604041814804077, 'timestamp': '2025-09-10 02:34:36.001259', 'step': 7455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:36.030769', 'step': 7455, 'epoch': 1} {'type': 'loss', 'content': 0.14566022157669067, 'timestamp': '2025-09-10 02:34:36.055487', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:34:43.955724', 'step': 7456, 'epoch': 1} {'type': 'pplx', 'content': 9261.75344325144, 'timestamp': '2025-09-10 02:34:43.962825', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:43.993531', 'step': 7456, 'epoch': 1} {'type': 'loss', 'content': 0.1521717756986618, 'timestamp': '2025-09-10 02:34:43.997746', 'step': 7457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.028586', 'step': 7457, 'epoch': 1} {'type': 'loss', 'content': 0.13300436735153198, 'timestamp': '2025-09-10 02:34:44.031843', 'step': 7458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:44.065324', 'step': 7458, 'epoch': 1} {'type': 'loss', 'content': 0.06959982961416245, 'timestamp': '2025-09-10 02:34:44.068947', 'step': 7459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1542724875376}, 'timestamp': '2025-09-10 02:34:44.146294', 'step': 7459, 'epoch': 1} {'type': 'loss', 'content': 0.3879510164260864, 'timestamp': '2025-09-10 02:34:44.170146', 'step': 7460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.201306', 'step': 7460, 'epoch': 2} {'type': 'loss', 'content': 0.07873374968767166, 'timestamp': '2025-09-10 02:34:44.203729', 'step': 7461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.234366', 'step': 7461, 'epoch': 2} {'type': 'loss', 'content': 0.10577568411827087, 'timestamp': '2025-09-10 02:34:44.236790', 'step': 7462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:44.269380', 'step': 7462, 'epoch': 2} {'type': 'loss', 'content': 0.13085685670375824, 'timestamp': '2025-09-10 02:34:44.271961', 'step': 7463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:44.302861', 'step': 7463, 'epoch': 2} {'type': 'loss', 'content': 0.13053947687149048, 'timestamp': '2025-09-10 02:34:44.326359', 'step': 7464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:44.356851', 'step': 7464, 'epoch': 2} {'type': 'loss', 'content': 0.21844075620174408, 'timestamp': '2025-09-10 02:34:44.359068', 'step': 7465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.388995', 'step': 7465, 'epoch': 2} {'type': 'loss', 'content': 0.08673840016126633, 'timestamp': '2025-09-10 02:34:44.392253', 'step': 7466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.422208', 'step': 7466, 'epoch': 2} {'type': 'loss', 'content': 0.19090449810028076, 'timestamp': '2025-09-10 02:34:44.424791', 'step': 7467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.455170', 'step': 7467, 'epoch': 2} {'type': 'loss', 'content': 0.06973331421613693, 'timestamp': '2025-09-10 02:34:44.478803', 'step': 7468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.509428', 'step': 7468, 'epoch': 2} {'type': 'loss', 'content': 0.09606929868459702, 'timestamp': '2025-09-10 02:34:44.511715', 'step': 7469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:44.541742', 'step': 7469, 'epoch': 2} {'type': 'loss', 'content': 0.1331539750099182, 'timestamp': '2025-09-10 02:34:44.545982', 'step': 7470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:44.575964', 'step': 7470, 'epoch': 2} {'type': 'loss', 'content': 0.08619929105043411, 'timestamp': '2025-09-10 02:34:44.578335', 'step': 7471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.608451', 'step': 7471, 'epoch': 2} {'type': 'loss', 'content': 0.15377120673656464, 'timestamp': '2025-09-10 02:34:44.631681', 'step': 7472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.662020', 'step': 7472, 'epoch': 2} {'type': 'loss', 'content': 0.0987619012594223, 'timestamp': '2025-09-10 02:34:44.664195', 'step': 7473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:44.694305', 'step': 7473, 'epoch': 2} {'type': 'loss', 'content': 0.09283756464719772, 'timestamp': '2025-09-10 02:34:44.697449', 'step': 7474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:44.729185', 'step': 7474, 'epoch': 2} {'type': 'loss', 'content': 0.11481689661741257, 'timestamp': '2025-09-10 02:34:44.731741', 'step': 7475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:44.762059', 'step': 7475, 'epoch': 2} {'type': 'loss', 'content': 0.17948709428310394, 'timestamp': '2025-09-10 02:34:44.785779', 'step': 7476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:44.815566', 'step': 7476, 'epoch': 2} {'type': 'loss', 'content': 0.05581623315811157, 'timestamp': '2025-09-10 02:34:44.818249', 'step': 7477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:44.848467', 'step': 7477, 'epoch': 2} {'type': 'loss', 'content': 0.1523466855287552, 'timestamp': '2025-09-10 02:34:44.850792', 'step': 7478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:44.881148', 'step': 7478, 'epoch': 2} {'type': 'loss', 'content': 0.20822562277317047, 'timestamp': '2025-09-10 02:34:44.883513', 'step': 7479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:44.913899', 'step': 7479, 'epoch': 2} {'type': 'loss', 'content': 0.05913921818137169, 'timestamp': '2025-09-10 02:34:44.937685', 'step': 7480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:44.968149', 'step': 7480, 'epoch': 2} {'type': 'loss', 'content': 0.06903538852930069, 'timestamp': '2025-09-10 02:34:44.970352', 'step': 7481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:45.000291', 'step': 7481, 'epoch': 2} {'type': 'loss', 'content': 0.13949991762638092, 'timestamp': '2025-09-10 02:34:45.002461', 'step': 7482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:45.034415', 'step': 7482, 'epoch': 2} {'type': 'loss', 'content': 0.06964024156332016, 'timestamp': '2025-09-10 02:34:45.036889', 'step': 7483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:45.067685', 'step': 7483, 'epoch': 2} {'type': 'loss', 'content': 0.13886183500289917, 'timestamp': '2025-09-10 02:34:45.091327', 'step': 7484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:45.127367', 'step': 7484, 'epoch': 2} {'type': 'loss', 'content': 0.056898847222328186, 'timestamp': '2025-09-10 02:34:45.130810', 'step': 7485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:45.162910', 'step': 7485, 'epoch': 2} {'type': 'loss', 'content': 0.130764439702034, 'timestamp': '2025-09-10 02:34:45.165786', 'step': 7486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:45.196019', 'step': 7486, 'epoch': 2} {'type': 'loss', 'content': 0.1967627853155136, 'timestamp': '2025-09-10 02:34:45.198326', 'step': 7487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:45.228749', 'step': 7487, 'epoch': 2} {'type': 'loss', 'content': 0.0956702008843422, 'timestamp': '2025-09-10 02:34:45.255213', 'step': 7488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:45.293715', 'step': 7488, 'epoch': 2} {'type': 'loss', 'content': 0.10603191703557968, 'timestamp': '2025-09-10 02:34:45.296398', 'step': 7489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:45.328784', 'step': 7489, 'epoch': 2} {'type': 'loss', 'content': 0.07295876741409302, 'timestamp': '2025-09-10 02:34:45.331364', 'step': 7490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:45.361618', 'step': 7490, 'epoch': 2} {'type': 'loss', 'content': 0.13520389795303345, 'timestamp': '2025-09-10 02:34:45.363957', 'step': 7491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:45.394328', 'step': 7491, 'epoch': 2} {'type': 'loss', 'content': 0.082381971180439, 'timestamp': '2025-09-10 02:34:45.417847', 'step': 7492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:45.448304', 'step': 7492, 'epoch': 2} {'type': 'loss', 'content': 0.19308991730213165, 'timestamp': '2025-09-10 02:34:45.450527', 'step': 7493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:45.483023', 'step': 7493, 'epoch': 2} {'type': 'loss', 'content': 0.13650064170360565, 'timestamp': '2025-09-10 02:34:45.485484', 'step': 7494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:45.515887', 'step': 7494, 'epoch': 2} {'type': 'loss', 'content': 0.18843476474285126, 'timestamp': '2025-09-10 02:34:45.518279', 'step': 7495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:45.548279', 'step': 7495, 'epoch': 2} {'type': 'loss', 'content': 0.06091931089758873, 'timestamp': '2025-09-10 02:34:45.571891', 'step': 7496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:45.602269', 'step': 7496, 'epoch': 2} {'type': 'loss', 'content': 0.12141567468643188, 'timestamp': '2025-09-10 02:34:45.604564', 'step': 7497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:45.634474', 'step': 7497, 'epoch': 2} {'type': 'loss', 'content': 0.08907576650381088, 'timestamp': '2025-09-10 02:34:45.636796', 'step': 7498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:45.667245', 'step': 7498, 'epoch': 2} {'type': 'loss', 'content': 0.1766258031129837, 'timestamp': '2025-09-10 02:34:45.669622', 'step': 7499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:45.701047', 'step': 7499, 'epoch': 2} {'type': 'loss', 'content': 0.1435932219028473, 'timestamp': '2025-09-10 02:34:45.724722', 'step': 7500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 7500', 'timestamp': '2025-09-10 02:34:51.937988', 'step': 7500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:52.005639', 'step': 7500, 'epoch': 2} {'type': 'loss', 'content': 0.0896831750869751, 'timestamp': '2025-09-10 02:34:52.018616', 'step': 7501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:52.066968', 'step': 7501, 'epoch': 2} {'type': 'loss', 'content': 0.13509424030780792, 'timestamp': '2025-09-10 02:34:52.069783', 'step': 7502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:52.099919', 'step': 7502, 'epoch': 2} {'type': 'loss', 'content': 0.13347148895263672, 'timestamp': '2025-09-10 02:34:52.102553', 'step': 7503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:52.133723', 'step': 7503, 'epoch': 2} {'type': 'loss', 'content': 0.1699630469083786, 'timestamp': '2025-09-10 02:34:52.157584', 'step': 7504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:52.188665', 'step': 7504, 'epoch': 2} {'type': 'loss', 'content': 0.18813203275203705, 'timestamp': '2025-09-10 02:34:52.191195', 'step': 7505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:52.221459', 'step': 7505, 'epoch': 2} {'type': 'loss', 'content': 0.1270269751548767, 'timestamp': '2025-09-10 02:34:52.223754', 'step': 7506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:52.254186', 'step': 7506, 'epoch': 2} {'type': 'loss', 'content': 0.14170950651168823, 'timestamp': '2025-09-10 02:34:52.259013', 'step': 7507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:52.297196', 'step': 7507, 'epoch': 2} {'type': 'loss', 'content': 0.18583951890468597, 'timestamp': '2025-09-10 02:34:52.320624', 'step': 7508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:52.350843', 'step': 7508, 'epoch': 2} {'type': 'loss', 'content': 0.18844875693321228, 'timestamp': '2025-09-10 02:34:52.353032', 'step': 7509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:52.390042', 'step': 7509, 'epoch': 2} {'type': 'loss', 'content': 0.10242427885532379, 'timestamp': '2025-09-10 02:34:52.411856', 'step': 7510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:52.491730', 'step': 7510, 'epoch': 2} {'type': 'loss', 'content': 0.1528681367635727, 'timestamp': '2025-09-10 02:34:52.508172', 'step': 7511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:52.581330', 'step': 7511, 'epoch': 2} {'type': 'loss', 'content': 0.20324520766735077, 'timestamp': '2025-09-10 02:34:52.605875', 'step': 7512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:52.643636', 'step': 7512, 'epoch': 2} {'type': 'loss', 'content': 0.10303281247615814, 'timestamp': '2025-09-10 02:34:52.648268', 'step': 7513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:52.684719', 'step': 7513, 'epoch': 2} {'type': 'loss', 'content': 0.09942246228456497, 'timestamp': '2025-09-10 02:34:52.688237', 'step': 7514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:52.719939', 'step': 7514, 'epoch': 2} {'type': 'loss', 'content': 0.13725833594799042, 'timestamp': '2025-09-10 02:34:52.723515', 'step': 7515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:52.754045', 'step': 7515, 'epoch': 2} {'type': 'loss', 'content': 0.09476619213819504, 'timestamp': '2025-09-10 02:34:52.778688', 'step': 7516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:52.811010', 'step': 7516, 'epoch': 2} {'type': 'loss', 'content': 0.10694406181573868, 'timestamp': '2025-09-10 02:34:52.822782', 'step': 7517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:52.875637', 'step': 7517, 'epoch': 2} {'type': 'loss', 'content': 0.16171640157699585, 'timestamp': '2025-09-10 02:34:52.881193', 'step': 7518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:52.937372', 'step': 7518, 'epoch': 2} {'type': 'loss', 'content': 0.15390294790267944, 'timestamp': '2025-09-10 02:34:52.939936', 'step': 7519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:52.970889', 'step': 7519, 'epoch': 2} {'type': 'loss', 'content': 0.16111785173416138, 'timestamp': '2025-09-10 02:34:52.996425', 'step': 7520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:53.032867', 'step': 7520, 'epoch': 2} {'type': 'loss', 'content': 0.1435500830411911, 'timestamp': '2025-09-10 02:34:53.035822', 'step': 7521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:53.069701', 'step': 7521, 'epoch': 2} {'type': 'loss', 'content': 0.09620339423418045, 'timestamp': '2025-09-10 02:34:53.072191', 'step': 7522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:53.103749', 'step': 7522, 'epoch': 2} {'type': 'loss', 'content': 0.20494391024112701, 'timestamp': '2025-09-10 02:34:53.107784', 'step': 7523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.138546', 'step': 7523, 'epoch': 2} {'type': 'loss', 'content': 0.10123452544212341, 'timestamp': '2025-09-10 02:34:53.162361', 'step': 7524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:53.193280', 'step': 7524, 'epoch': 2} {'type': 'loss', 'content': 0.053282301872968674, 'timestamp': '2025-09-10 02:34:53.200715', 'step': 7525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:53.236370', 'step': 7525, 'epoch': 2} {'type': 'loss', 'content': 0.1607605516910553, 'timestamp': '2025-09-10 02:34:53.238998', 'step': 7526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:53.268925', 'step': 7526, 'epoch': 2} {'type': 'loss', 'content': 0.061869796365499496, 'timestamp': '2025-09-10 02:34:53.271628', 'step': 7527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.302541', 'step': 7527, 'epoch': 2} {'type': 'loss', 'content': 0.13715937733650208, 'timestamp': '2025-09-10 02:34:53.326595', 'step': 7528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:53.356781', 'step': 7528, 'epoch': 2} {'type': 'loss', 'content': 0.19074970483779907, 'timestamp': '2025-09-10 02:34:53.359320', 'step': 7529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.389151', 'step': 7529, 'epoch': 2} {'type': 'loss', 'content': 0.17757582664489746, 'timestamp': '2025-09-10 02:34:53.391521', 'step': 7530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.421038', 'step': 7530, 'epoch': 2} {'type': 'loss', 'content': 0.11853992193937302, 'timestamp': '2025-09-10 02:34:53.424610', 'step': 7531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.454815', 'step': 7531, 'epoch': 2} {'type': 'loss', 'content': 0.12662741541862488, 'timestamp': '2025-09-10 02:34:53.478508', 'step': 7532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:53.510198', 'step': 7532, 'epoch': 2} {'type': 'loss', 'content': 0.16034860908985138, 'timestamp': '2025-09-10 02:34:53.513146', 'step': 7533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.542540', 'step': 7533, 'epoch': 2} {'type': 'loss', 'content': 0.07288715243339539, 'timestamp': '2025-09-10 02:34:53.544787', 'step': 7534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:53.577112', 'step': 7534, 'epoch': 2} {'type': 'loss', 'content': 0.12070103734731674, 'timestamp': '2025-09-10 02:34:53.579502', 'step': 7535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:53.611308', 'step': 7535, 'epoch': 2} {'type': 'loss', 'content': 0.2195018231868744, 'timestamp': '2025-09-10 02:34:53.636299', 'step': 7536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:53.666796', 'step': 7536, 'epoch': 2} {'type': 'loss', 'content': 0.08610030263662338, 'timestamp': '2025-09-10 02:34:53.669432', 'step': 7537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.700272', 'step': 7537, 'epoch': 2} {'type': 'loss', 'content': 0.10054648667573929, 'timestamp': '2025-09-10 02:34:53.702658', 'step': 7538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.734015', 'step': 7538, 'epoch': 2} {'type': 'loss', 'content': 0.11058870702981949, 'timestamp': '2025-09-10 02:34:53.736414', 'step': 7539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.766711', 'step': 7539, 'epoch': 2} {'type': 'loss', 'content': 0.12357958406209946, 'timestamp': '2025-09-10 02:34:53.790482', 'step': 7540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:53.820359', 'step': 7540, 'epoch': 2} {'type': 'loss', 'content': 0.13314692676067352, 'timestamp': '2025-09-10 02:34:53.822979', 'step': 7541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.852891', 'step': 7541, 'epoch': 2} {'type': 'loss', 'content': 0.1539379209280014, 'timestamp': '2025-09-10 02:34:53.855487', 'step': 7542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:53.885599', 'step': 7542, 'epoch': 2} {'type': 'loss', 'content': 0.13175392150878906, 'timestamp': '2025-09-10 02:34:53.887993', 'step': 7543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:53.917995', 'step': 7543, 'epoch': 2} {'type': 'loss', 'content': 0.12340109050273895, 'timestamp': '2025-09-10 02:34:53.943078', 'step': 7544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:53.973359', 'step': 7544, 'epoch': 2} {'type': 'loss', 'content': 0.09362932294607162, 'timestamp': '2025-09-10 02:34:53.976435', 'step': 7545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:54.005803', 'step': 7545, 'epoch': 2} {'type': 'loss', 'content': 0.07984548807144165, 'timestamp': '2025-09-10 02:34:54.009537', 'step': 7546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.040340', 'step': 7546, 'epoch': 2} {'type': 'loss', 'content': 0.14706265926361084, 'timestamp': '2025-09-10 02:34:54.042749', 'step': 7547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:54.072795', 'step': 7547, 'epoch': 2} {'type': 'loss', 'content': 0.21996377408504486, 'timestamp': '2025-09-10 02:34:54.096609', 'step': 7548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.126835', 'step': 7548, 'epoch': 2} {'type': 'loss', 'content': 0.0837424099445343, 'timestamp': '2025-09-10 02:34:54.129522', 'step': 7549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.160189', 'step': 7549, 'epoch': 2} {'type': 'loss', 'content': 0.2112722396850586, 'timestamp': '2025-09-10 02:34:54.162650', 'step': 7550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.192855', 'step': 7550, 'epoch': 2} {'type': 'loss', 'content': 0.08075360953807831, 'timestamp': '2025-09-10 02:34:54.195360', 'step': 7551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.225541', 'step': 7551, 'epoch': 2} {'type': 'loss', 'content': 0.10676535964012146, 'timestamp': '2025-09-10 02:34:54.249442', 'step': 7552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.280675', 'step': 7552, 'epoch': 2} {'type': 'loss', 'content': 0.13987918198108673, 'timestamp': '2025-09-10 02:34:54.283452', 'step': 7553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:54.315896', 'step': 7553, 'epoch': 2} {'type': 'loss', 'content': 0.1003153845667839, 'timestamp': '2025-09-10 02:34:54.319000', 'step': 7554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.348219', 'step': 7554, 'epoch': 2} {'type': 'loss', 'content': 0.09026581048965454, 'timestamp': '2025-09-10 02:34:54.350830', 'step': 7555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:54.380645', 'step': 7555, 'epoch': 2} {'type': 'loss', 'content': 0.17523816227912903, 'timestamp': '2025-09-10 02:34:54.404467', 'step': 7556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:54.434672', 'step': 7556, 'epoch': 2} {'type': 'loss', 'content': 0.13292692601680756, 'timestamp': '2025-09-10 02:34:54.438575', 'step': 7557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:54.471257', 'step': 7557, 'epoch': 2} {'type': 'loss', 'content': 0.14482350647449493, 'timestamp': '2025-09-10 02:34:54.473603', 'step': 7558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:54.504517', 'step': 7558, 'epoch': 2} {'type': 'loss', 'content': 0.11101789772510529, 'timestamp': '2025-09-10 02:34:54.507044', 'step': 7559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:54.536925', 'step': 7559, 'epoch': 2} {'type': 'loss', 'content': 0.13598400354385376, 'timestamp': '2025-09-10 02:34:54.560363', 'step': 7560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:54.591101', 'step': 7560, 'epoch': 2} {'type': 'loss', 'content': 0.19133320450782776, 'timestamp': '2025-09-10 02:34:54.595443', 'step': 7561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.624913', 'step': 7561, 'epoch': 2} {'type': 'loss', 'content': 0.10252269357442856, 'timestamp': '2025-09-10 02:34:54.628868', 'step': 7562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:54.659734', 'step': 7562, 'epoch': 2} {'type': 'loss', 'content': 0.19863833487033844, 'timestamp': '2025-09-10 02:34:54.662256', 'step': 7563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:54.691911', 'step': 7563, 'epoch': 2} {'type': 'loss', 'content': 0.1846427470445633, 'timestamp': '2025-09-10 02:34:54.715488', 'step': 7564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:54.745762', 'step': 7564, 'epoch': 2} {'type': 'loss', 'content': 0.15607240796089172, 'timestamp': '2025-09-10 02:34:54.748360', 'step': 7565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:54.778520', 'step': 7565, 'epoch': 2} {'type': 'loss', 'content': 0.10817749798297882, 'timestamp': '2025-09-10 02:34:54.781145', 'step': 7566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:54.811810', 'step': 7566, 'epoch': 2} {'type': 'loss', 'content': 0.14886249601840973, 'timestamp': '2025-09-10 02:34:54.816778', 'step': 7567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.847924', 'step': 7567, 'epoch': 2} {'type': 'loss', 'content': 0.1310867965221405, 'timestamp': '2025-09-10 02:34:54.871708', 'step': 7568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:54.902569', 'step': 7568, 'epoch': 2} {'type': 'loss', 'content': 0.11846348643302917, 'timestamp': '2025-09-10 02:34:54.904910', 'step': 7569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:54.934566', 'step': 7569, 'epoch': 2} {'type': 'loss', 'content': 0.2114129364490509, 'timestamp': '2025-09-10 02:34:54.937097', 'step': 7570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:54.967156', 'step': 7570, 'epoch': 2} {'type': 'loss', 'content': 0.10129762440919876, 'timestamp': '2025-09-10 02:34:54.969539', 'step': 7571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:54.999705', 'step': 7571, 'epoch': 2} {'type': 'loss', 'content': 0.08381779491901398, 'timestamp': '2025-09-10 02:34:55.023584', 'step': 7572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:55.052613', 'step': 7572, 'epoch': 2} {'type': 'loss', 'content': 0.13077571988105774, 'timestamp': '2025-09-10 02:34:55.055817', 'step': 7573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:55.085557', 'step': 7573, 'epoch': 2} {'type': 'loss', 'content': 0.23781642317771912, 'timestamp': '2025-09-10 02:34:55.088157', 'step': 7574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:55.117279', 'step': 7574, 'epoch': 2} {'type': 'loss', 'content': 0.08225002884864807, 'timestamp': '2025-09-10 02:34:55.119681', 'step': 7575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:55.149585', 'step': 7575, 'epoch': 2} {'type': 'loss', 'content': 0.214640274643898, 'timestamp': '2025-09-10 02:34:55.173118', 'step': 7576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:55.205141', 'step': 7576, 'epoch': 2} {'type': 'loss', 'content': 0.14141304790973663, 'timestamp': '2025-09-10 02:34:55.207410', 'step': 7577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:55.237086', 'step': 7577, 'epoch': 2} {'type': 'loss', 'content': 0.19154392182826996, 'timestamp': '2025-09-10 02:34:55.239522', 'step': 7578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:55.268983', 'step': 7578, 'epoch': 2} {'type': 'loss', 'content': 0.07915593683719635, 'timestamp': '2025-09-10 02:34:55.275923', 'step': 7579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:55.319168', 'step': 7579, 'epoch': 2} {'type': 'loss', 'content': 0.22060400247573853, 'timestamp': '2025-09-10 02:34:55.346051', 'step': 7580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:55.394345', 'step': 7580, 'epoch': 2} {'type': 'loss', 'content': 0.15828342735767365, 'timestamp': '2025-09-10 02:34:55.396809', 'step': 7581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:55.428474', 'step': 7581, 'epoch': 2} {'type': 'loss', 'content': 0.14191173017024994, 'timestamp': '2025-09-10 02:34:55.431744', 'step': 7582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:55.461848', 'step': 7582, 'epoch': 2} {'type': 'loss', 'content': 0.2085629552602768, 'timestamp': '2025-09-10 02:34:55.464130', 'step': 7583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:55.493183', 'step': 7583, 'epoch': 2} {'type': 'loss', 'content': 0.11952215433120728, 'timestamp': '2025-09-10 02:34:55.516789', 'step': 7584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:55.546252', 'step': 7584, 'epoch': 2} {'type': 'loss', 'content': 0.07968704402446747, 'timestamp': '2025-09-10 02:34:55.548726', 'step': 7585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:55.578923', 'step': 7585, 'epoch': 2} {'type': 'loss', 'content': 0.09248973429203033, 'timestamp': '2025-09-10 02:34:55.581252', 'step': 7586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:55.611430', 'step': 7586, 'epoch': 2} {'type': 'loss', 'content': 0.15108245611190796, 'timestamp': '2025-09-10 02:34:55.613792', 'step': 7587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:55.644376', 'step': 7587, 'epoch': 2} {'type': 'loss', 'content': 0.31107982993125916, 'timestamp': '2025-09-10 02:34:55.667894', 'step': 7588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:55.698893', 'step': 7588, 'epoch': 2} {'type': 'loss', 'content': 0.18196159601211548, 'timestamp': '2025-09-10 02:34:55.701265', 'step': 7589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:55.732483', 'step': 7589, 'epoch': 2} {'type': 'loss', 'content': 0.1295813024044037, 'timestamp': '2025-09-10 02:34:55.735080', 'step': 7590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:55.765277', 'step': 7590, 'epoch': 2} {'type': 'loss', 'content': 0.09511139243841171, 'timestamp': '2025-09-10 02:34:55.767508', 'step': 7591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:55.798288', 'step': 7591, 'epoch': 2} {'type': 'loss', 'content': 0.1374426931142807, 'timestamp': '2025-09-10 02:34:55.821953', 'step': 7592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:55.858554', 'step': 7592, 'epoch': 2} {'type': 'loss', 'content': 0.19168727099895477, 'timestamp': '2025-09-10 02:34:55.861274', 'step': 7593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:55.893574', 'step': 7593, 'epoch': 2} {'type': 'loss', 'content': 0.18469347059726715, 'timestamp': '2025-09-10 02:34:55.895693', 'step': 7594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:55.926379', 'step': 7594, 'epoch': 2} {'type': 'loss', 'content': 0.12917031347751617, 'timestamp': '2025-09-10 02:34:55.929692', 'step': 7595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:55.963039', 'step': 7595, 'epoch': 2} {'type': 'loss', 'content': 0.11963307112455368, 'timestamp': '2025-09-10 02:34:55.988053', 'step': 7596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:56.019505', 'step': 7596, 'epoch': 2} {'type': 'loss', 'content': 0.14171382784843445, 'timestamp': '2025-09-10 02:34:56.023700', 'step': 7597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:56.058951', 'step': 7597, 'epoch': 2} {'type': 'loss', 'content': 0.13375772535800934, 'timestamp': '2025-09-10 02:34:56.063447', 'step': 7598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:56.100392', 'step': 7598, 'epoch': 2} {'type': 'loss', 'content': 0.12413100898265839, 'timestamp': '2025-09-10 02:34:56.103439', 'step': 7599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:56.137439', 'step': 7599, 'epoch': 2} {'type': 'loss', 'content': 0.17646080255508423, 'timestamp': '2025-09-10 02:34:56.162862', 'step': 7600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:56.193673', 'step': 7600, 'epoch': 2} {'type': 'loss', 'content': 0.16826745867729187, 'timestamp': '2025-09-10 02:34:56.196901', 'step': 7601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:56.227949', 'step': 7601, 'epoch': 2} {'type': 'loss', 'content': 0.14164303243160248, 'timestamp': '2025-09-10 02:34:56.230165', 'step': 7602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:56.259570', 'step': 7602, 'epoch': 2} {'type': 'loss', 'content': 0.18116575479507446, 'timestamp': '2025-09-10 02:34:56.262072', 'step': 7603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:56.293320', 'step': 7603, 'epoch': 2} {'type': 'loss', 'content': 0.1564561128616333, 'timestamp': '2025-09-10 02:34:56.316876', 'step': 7604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:56.348118', 'step': 7604, 'epoch': 2} {'type': 'loss', 'content': 0.15278953313827515, 'timestamp': '2025-09-10 02:34:56.350728', 'step': 7605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:56.382364', 'step': 7605, 'epoch': 2} {'type': 'loss', 'content': 0.2071520835161209, 'timestamp': '2025-09-10 02:34:56.384781', 'step': 7606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:56.414704', 'step': 7606, 'epoch': 2} {'type': 'loss', 'content': 0.1387873888015747, 'timestamp': '2025-09-10 02:34:56.418743', 'step': 7607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:56.449119', 'step': 7607, 'epoch': 2} {'type': 'loss', 'content': 0.12624196708202362, 'timestamp': '2025-09-10 02:34:56.472878', 'step': 7608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:56.503218', 'step': 7608, 'epoch': 2} {'type': 'loss', 'content': 0.0867796465754509, 'timestamp': '2025-09-10 02:34:56.505775', 'step': 7609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:56.536662', 'step': 7609, 'epoch': 2} {'type': 'loss', 'content': 0.14025671780109406, 'timestamp': '2025-09-10 02:34:56.538888', 'step': 7610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:56.568606', 'step': 7610, 'epoch': 2} {'type': 'loss', 'content': 0.10851854085922241, 'timestamp': '2025-09-10 02:34:56.571259', 'step': 7611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:56.601703', 'step': 7611, 'epoch': 2} {'type': 'loss', 'content': 0.07100827246904373, 'timestamp': '2025-09-10 02:34:56.625494', 'step': 7612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:56.655783', 'step': 7612, 'epoch': 2} {'type': 'loss', 'content': 0.12746983766555786, 'timestamp': '2025-09-10 02:34:56.658234', 'step': 7613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:56.688021', 'step': 7613, 'epoch': 2} {'type': 'loss', 'content': 0.1718376874923706, 'timestamp': '2025-09-10 02:34:56.690518', 'step': 7614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:56.721129', 'step': 7614, 'epoch': 2} {'type': 'loss', 'content': 0.2649810016155243, 'timestamp': '2025-09-10 02:34:56.725656', 'step': 7615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:56.756291', 'step': 7615, 'epoch': 2} {'type': 'loss', 'content': 0.15298399329185486, 'timestamp': '2025-09-10 02:34:56.781462', 'step': 7616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:56.811307', 'step': 7616, 'epoch': 2} {'type': 'loss', 'content': 0.07740438729524612, 'timestamp': '2025-09-10 02:34:56.814633', 'step': 7617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:56.849346', 'step': 7617, 'epoch': 2} {'type': 'loss', 'content': 0.17442923784255981, 'timestamp': '2025-09-10 02:34:56.851867', 'step': 7618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:56.882938', 'step': 7618, 'epoch': 2} {'type': 'loss', 'content': 0.12386715412139893, 'timestamp': '2025-09-10 02:34:56.885407', 'step': 7619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:56.915207', 'step': 7619, 'epoch': 2} {'type': 'loss', 'content': 0.1251170039176941, 'timestamp': '2025-09-10 02:34:56.943949', 'step': 7620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:56.979408', 'step': 7620, 'epoch': 2} {'type': 'loss', 'content': 0.1520477831363678, 'timestamp': '2025-09-10 02:34:56.981643', 'step': 7621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:57.015427', 'step': 7621, 'epoch': 2} {'type': 'loss', 'content': 0.19984324276447296, 'timestamp': '2025-09-10 02:34:57.017637', 'step': 7622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:57.047900', 'step': 7622, 'epoch': 2} {'type': 'loss', 'content': 0.10936253517866135, 'timestamp': '2025-09-10 02:34:57.050383', 'step': 7623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:57.080063', 'step': 7623, 'epoch': 2} {'type': 'loss', 'content': 0.1098262295126915, 'timestamp': '2025-09-10 02:34:57.104755', 'step': 7624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:57.134586', 'step': 7624, 'epoch': 2} {'type': 'loss', 'content': 0.14972251653671265, 'timestamp': '2025-09-10 02:34:57.137197', 'step': 7625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:34:57.168499', 'step': 7625, 'epoch': 2} {'type': 'loss', 'content': 0.09662509709596634, 'timestamp': '2025-09-10 02:34:57.170796', 'step': 7626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:57.201966', 'step': 7626, 'epoch': 2} {'type': 'loss', 'content': 0.08215955644845963, 'timestamp': '2025-09-10 02:34:57.204359', 'step': 7627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:57.233922', 'step': 7627, 'epoch': 2} {'type': 'loss', 'content': 0.09509313106536865, 'timestamp': '2025-09-10 02:34:57.257347', 'step': 7628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:57.288754', 'step': 7628, 'epoch': 2} {'type': 'loss', 'content': 0.07453466206789017, 'timestamp': '2025-09-10 02:34:57.290878', 'step': 7629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:57.320896', 'step': 7629, 'epoch': 2} {'type': 'loss', 'content': 0.1105203628540039, 'timestamp': '2025-09-10 02:34:57.323147', 'step': 7630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:57.355086', 'step': 7630, 'epoch': 2} {'type': 'loss', 'content': 0.09648182988166809, 'timestamp': '2025-09-10 02:34:57.357465', 'step': 7631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:57.388971', 'step': 7631, 'epoch': 2} {'type': 'loss', 'content': 0.19148460030555725, 'timestamp': '2025-09-10 02:34:57.412564', 'step': 7632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:57.443154', 'step': 7632, 'epoch': 2} {'type': 'loss', 'content': 0.22514285147190094, 'timestamp': '2025-09-10 02:34:57.447003', 'step': 7633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:57.481231', 'step': 7633, 'epoch': 2} {'type': 'loss', 'content': 0.15389245748519897, 'timestamp': '2025-09-10 02:34:57.484008', 'step': 7634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:57.521185', 'step': 7634, 'epoch': 2} {'type': 'loss', 'content': 0.1729283630847931, 'timestamp': '2025-09-10 02:34:57.525256', 'step': 7635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:57.567355', 'step': 7635, 'epoch': 2} {'type': 'loss', 'content': 0.2241073101758957, 'timestamp': '2025-09-10 02:34:57.592568', 'step': 7636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:57.623087', 'step': 7636, 'epoch': 2} {'type': 'loss', 'content': 0.17290098965168, 'timestamp': '2025-09-10 02:34:57.625457', 'step': 7637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:57.655452', 'step': 7637, 'epoch': 2} {'type': 'loss', 'content': 0.0741182491183281, 'timestamp': '2025-09-10 02:34:57.657745', 'step': 7638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:57.688959', 'step': 7638, 'epoch': 2} {'type': 'loss', 'content': 0.21356774866580963, 'timestamp': '2025-09-10 02:34:57.691645', 'step': 7639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:57.721332', 'step': 7639, 'epoch': 2} {'type': 'loss', 'content': 0.10116743296384811, 'timestamp': '2025-09-10 02:34:57.745195', 'step': 7640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:57.776480', 'step': 7640, 'epoch': 2} {'type': 'loss', 'content': 0.15918312966823578, 'timestamp': '2025-09-10 02:34:57.778559', 'step': 7641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:57.809264', 'step': 7641, 'epoch': 2} {'type': 'loss', 'content': 0.09813441336154938, 'timestamp': '2025-09-10 02:34:57.811638', 'step': 7642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:57.845958', 'step': 7642, 'epoch': 2} {'type': 'loss', 'content': 0.16044971346855164, 'timestamp': '2025-09-10 02:34:57.850641', 'step': 7643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:57.888290', 'step': 7643, 'epoch': 2} {'type': 'loss', 'content': 0.09724827110767365, 'timestamp': '2025-09-10 02:34:57.913317', 'step': 7644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:34:57.951872', 'step': 7644, 'epoch': 2} {'type': 'loss', 'content': 0.15002278983592987, 'timestamp': '2025-09-10 02:34:57.957182', 'step': 7645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:57.999194', 'step': 7645, 'epoch': 2} {'type': 'loss', 'content': 0.08070385456085205, 'timestamp': '2025-09-10 02:34:58.003863', 'step': 7646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:58.038724', 'step': 7646, 'epoch': 2} {'type': 'loss', 'content': 0.12485577911138535, 'timestamp': '2025-09-10 02:34:58.044898', 'step': 7647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:58.084328', 'step': 7647, 'epoch': 2} {'type': 'loss', 'content': 0.14965187013149261, 'timestamp': '2025-09-10 02:34:58.107729', 'step': 7648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:34:58.142321', 'step': 7648, 'epoch': 2} {'type': 'loss', 'content': 0.1720026284456253, 'timestamp': '2025-09-10 02:34:58.145095', 'step': 7649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:58.177340', 'step': 7649, 'epoch': 2} {'type': 'loss', 'content': 0.1883993297815323, 'timestamp': '2025-09-10 02:34:58.179857', 'step': 7650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:58.210803', 'step': 7650, 'epoch': 2} {'type': 'loss', 'content': 0.0702507272362709, 'timestamp': '2025-09-10 02:34:58.212920', 'step': 7651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:58.242759', 'step': 7651, 'epoch': 2} {'type': 'loss', 'content': 0.20609864592552185, 'timestamp': '2025-09-10 02:34:58.266269', 'step': 7652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:58.297453', 'step': 7652, 'epoch': 2} {'type': 'loss', 'content': 0.15039825439453125, 'timestamp': '2025-09-10 02:34:58.299503', 'step': 7653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:58.329493', 'step': 7653, 'epoch': 2} {'type': 'loss', 'content': 0.1779014617204666, 'timestamp': '2025-09-10 02:34:58.331735', 'step': 7654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:58.362311', 'step': 7654, 'epoch': 2} {'type': 'loss', 'content': 0.1391800493001938, 'timestamp': '2025-09-10 02:34:58.364824', 'step': 7655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:58.395855', 'step': 7655, 'epoch': 2} {'type': 'loss', 'content': 0.12279921770095825, 'timestamp': '2025-09-10 02:34:58.419050', 'step': 7656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:58.449509', 'step': 7656, 'epoch': 2} {'type': 'loss', 'content': 0.13358689844608307, 'timestamp': '2025-09-10 02:34:58.451594', 'step': 7657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:58.482284', 'step': 7657, 'epoch': 2} {'type': 'loss', 'content': 0.16848796606063843, 'timestamp': '2025-09-10 02:34:58.484586', 'step': 7658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:58.514929', 'step': 7658, 'epoch': 2} {'type': 'loss', 'content': 0.08582146465778351, 'timestamp': '2025-09-10 02:34:58.517095', 'step': 7659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:58.547683', 'step': 7659, 'epoch': 2} {'type': 'loss', 'content': 0.1454809308052063, 'timestamp': '2025-09-10 02:34:58.571008', 'step': 7660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:58.611336', 'step': 7660, 'epoch': 2} {'type': 'loss', 'content': 0.21978630125522614, 'timestamp': '2025-09-10 02:34:58.617098', 'step': 7661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:58.664683', 'step': 7661, 'epoch': 2} {'type': 'loss', 'content': 0.09913881123065948, 'timestamp': '2025-09-10 02:34:58.672915', 'step': 7662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:34:58.713572', 'step': 7662, 'epoch': 2} {'type': 'loss', 'content': 0.10600228607654572, 'timestamp': '2025-09-10 02:34:58.721705', 'step': 7663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:58.773880', 'step': 7663, 'epoch': 2} {'type': 'loss', 'content': 0.12396945804357529, 'timestamp': '2025-09-10 02:34:58.809608', 'step': 7664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:58.857136', 'step': 7664, 'epoch': 2} {'type': 'loss', 'content': 0.18293841183185577, 'timestamp': '2025-09-10 02:34:58.863531', 'step': 7665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:58.901195', 'step': 7665, 'epoch': 2} {'type': 'loss', 'content': 0.10961825400590897, 'timestamp': '2025-09-10 02:34:58.909773', 'step': 7666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:58.958195', 'step': 7666, 'epoch': 2} {'type': 'loss', 'content': 0.16676457226276398, 'timestamp': '2025-09-10 02:34:58.961512', 'step': 7667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:59.020585', 'step': 7667, 'epoch': 2} {'type': 'loss', 'content': 0.15310777723789215, 'timestamp': '2025-09-10 02:34:59.053981', 'step': 7668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:59.113457', 'step': 7668, 'epoch': 2} {'type': 'loss', 'content': 0.1454848051071167, 'timestamp': '2025-09-10 02:34:59.122501', 'step': 7669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:59.178483', 'step': 7669, 'epoch': 2} {'type': 'loss', 'content': 0.2707080841064453, 'timestamp': '2025-09-10 02:34:59.184360', 'step': 7670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:34:59.229984', 'step': 7670, 'epoch': 2} {'type': 'loss', 'content': 0.11835876852273941, 'timestamp': '2025-09-10 02:34:59.239125', 'step': 7671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:59.282165', 'step': 7671, 'epoch': 2} {'type': 'loss', 'content': 0.1602020114660263, 'timestamp': '2025-09-10 02:34:59.316848', 'step': 7672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:59.370191', 'step': 7672, 'epoch': 2} {'type': 'loss', 'content': 0.12547656893730164, 'timestamp': '2025-09-10 02:34:59.378186', 'step': 7673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:59.433735', 'step': 7673, 'epoch': 2} {'type': 'loss', 'content': 0.22047173976898193, 'timestamp': '2025-09-10 02:34:59.437184', 'step': 7674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:59.499824', 'step': 7674, 'epoch': 2} {'type': 'loss', 'content': 0.11142108589410782, 'timestamp': '2025-09-10 02:34:59.507291', 'step': 7675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:59.546933', 'step': 7675, 'epoch': 2} {'type': 'loss', 'content': 0.20149004459381104, 'timestamp': '2025-09-10 02:34:59.573078', 'step': 7676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:59.608590', 'step': 7676, 'epoch': 2} {'type': 'loss', 'content': 0.1633336991071701, 'timestamp': '2025-09-10 02:34:59.610625', 'step': 7677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:59.640092', 'step': 7677, 'epoch': 2} {'type': 'loss', 'content': 0.13711468875408173, 'timestamp': '2025-09-10 02:34:59.642396', 'step': 7678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:59.673038', 'step': 7678, 'epoch': 2} {'type': 'loss', 'content': 0.2000594139099121, 'timestamp': '2025-09-10 02:34:59.675584', 'step': 7679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:59.706492', 'step': 7679, 'epoch': 2} {'type': 'loss', 'content': 0.13497541844844818, 'timestamp': '2025-09-10 02:34:59.729809', 'step': 7680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:59.759816', 'step': 7680, 'epoch': 2} {'type': 'loss', 'content': 0.17823311686515808, 'timestamp': '2025-09-10 02:34:59.763271', 'step': 7681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:59.794797', 'step': 7681, 'epoch': 2} {'type': 'loss', 'content': 0.07952198386192322, 'timestamp': '2025-09-10 02:34:59.797560', 'step': 7682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:59.829177', 'step': 7682, 'epoch': 2} {'type': 'loss', 'content': 0.19045276939868927, 'timestamp': '2025-09-10 02:34:59.833970', 'step': 7683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:59.867288', 'step': 7683, 'epoch': 2} {'type': 'loss', 'content': 0.10305505990982056, 'timestamp': '2025-09-10 02:34:59.891000', 'step': 7684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:59.921350', 'step': 7684, 'epoch': 2} {'type': 'loss', 'content': 0.10082121193408966, 'timestamp': '2025-09-10 02:34:59.923650', 'step': 7685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:34:59.953985', 'step': 7685, 'epoch': 2} {'type': 'loss', 'content': 0.15756291151046753, 'timestamp': '2025-09-10 02:34:59.956557', 'step': 7686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:34:59.986564', 'step': 7686, 'epoch': 2} {'type': 'loss', 'content': 0.16974064707756042, 'timestamp': '2025-09-10 02:34:59.988885', 'step': 7687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:00.019862', 'step': 7687, 'epoch': 2} {'type': 'loss', 'content': 0.12312834709882736, 'timestamp': '2025-09-10 02:35:00.043569', 'step': 7688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.074447', 'step': 7688, 'epoch': 2} {'type': 'loss', 'content': 0.18615804612636566, 'timestamp': '2025-09-10 02:35:00.077076', 'step': 7689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:00.107667', 'step': 7689, 'epoch': 2} {'type': 'loss', 'content': 0.08602973073720932, 'timestamp': '2025-09-10 02:35:00.110106', 'step': 7690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.140060', 'step': 7690, 'epoch': 2} {'type': 'loss', 'content': 0.1334095448255539, 'timestamp': '2025-09-10 02:35:00.142345', 'step': 7691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.172674', 'step': 7691, 'epoch': 2} {'type': 'loss', 'content': 0.23641523718833923, 'timestamp': '2025-09-10 02:35:00.196365', 'step': 7692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:00.227664', 'step': 7692, 'epoch': 2} {'type': 'loss', 'content': 0.1270974576473236, 'timestamp': '2025-09-10 02:35:00.230125', 'step': 7693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:00.261055', 'step': 7693, 'epoch': 2} {'type': 'loss', 'content': 0.12712781131267548, 'timestamp': '2025-09-10 02:35:00.264291', 'step': 7694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:00.296442', 'step': 7694, 'epoch': 2} {'type': 'loss', 'content': 0.20018117129802704, 'timestamp': '2025-09-10 02:35:00.298554', 'step': 7695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:00.328846', 'step': 7695, 'epoch': 2} {'type': 'loss', 'content': 0.06699426472187042, 'timestamp': '2025-09-10 02:35:00.352414', 'step': 7696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.383667', 'step': 7696, 'epoch': 2} {'type': 'loss', 'content': 0.16860216856002808, 'timestamp': '2025-09-10 02:35:00.386706', 'step': 7697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.418277', 'step': 7697, 'epoch': 2} {'type': 'loss', 'content': 0.2335474044084549, 'timestamp': '2025-09-10 02:35:00.420637', 'step': 7698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.451123', 'step': 7698, 'epoch': 2} {'type': 'loss', 'content': 0.15216781198978424, 'timestamp': '2025-09-10 02:35:00.453983', 'step': 7699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:00.484688', 'step': 7699, 'epoch': 2} {'type': 'loss', 'content': 0.0630895346403122, 'timestamp': '2025-09-10 02:35:00.508119', 'step': 7700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.538546', 'step': 7700, 'epoch': 2} {'type': 'loss', 'content': 0.18931497633457184, 'timestamp': '2025-09-10 02:35:00.540903', 'step': 7701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:00.571578', 'step': 7701, 'epoch': 2} {'type': 'loss', 'content': 0.06906571239233017, 'timestamp': '2025-09-10 02:35:00.574762', 'step': 7702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.605232', 'step': 7702, 'epoch': 2} {'type': 'loss', 'content': 0.2742990553379059, 'timestamp': '2025-09-10 02:35:00.607406', 'step': 7703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:00.637478', 'step': 7703, 'epoch': 2} {'type': 'loss', 'content': 0.1609874814748764, 'timestamp': '2025-09-10 02:35:00.661101', 'step': 7704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.693427', 'step': 7704, 'epoch': 2} {'type': 'loss', 'content': 0.18368719518184662, 'timestamp': '2025-09-10 02:35:00.695742', 'step': 7705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:00.727134', 'step': 7705, 'epoch': 2} {'type': 'loss', 'content': 0.22306767106056213, 'timestamp': '2025-09-10 02:35:00.729528', 'step': 7706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:00.759893', 'step': 7706, 'epoch': 2} {'type': 'loss', 'content': 0.058252763003110886, 'timestamp': '2025-09-10 02:35:00.762459', 'step': 7707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:00.793717', 'step': 7707, 'epoch': 2} {'type': 'loss', 'content': 0.10151132941246033, 'timestamp': '2025-09-10 02:35:00.817517', 'step': 7708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:00.847676', 'step': 7708, 'epoch': 2} {'type': 'loss', 'content': 0.12972743809223175, 'timestamp': '2025-09-10 02:35:00.850795', 'step': 7709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:00.880729', 'step': 7709, 'epoch': 2} {'type': 'loss', 'content': 0.13569852709770203, 'timestamp': '2025-09-10 02:35:00.883092', 'step': 7710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:00.914343', 'step': 7710, 'epoch': 2} {'type': 'loss', 'content': 0.08991455286741257, 'timestamp': '2025-09-10 02:35:00.916491', 'step': 7711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:00.947253', 'step': 7711, 'epoch': 2} {'type': 'loss', 'content': 0.14928896725177765, 'timestamp': '2025-09-10 02:35:00.971313', 'step': 7712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:01.002825', 'step': 7712, 'epoch': 2} {'type': 'loss', 'content': 0.18609407544136047, 'timestamp': '2025-09-10 02:35:01.004971', 'step': 7713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.035021', 'step': 7713, 'epoch': 2} {'type': 'loss', 'content': 0.11574666202068329, 'timestamp': '2025-09-10 02:35:01.037251', 'step': 7714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:01.066949', 'step': 7714, 'epoch': 2} {'type': 'loss', 'content': 0.10268246382474899, 'timestamp': '2025-09-10 02:35:01.069437', 'step': 7715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.099527', 'step': 7715, 'epoch': 2} {'type': 'loss', 'content': 0.07710673660039902, 'timestamp': '2025-09-10 02:35:01.123189', 'step': 7716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:01.153374', 'step': 7716, 'epoch': 2} {'type': 'loss', 'content': 0.1476653814315796, 'timestamp': '2025-09-10 02:35:01.155605', 'step': 7717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.185536', 'step': 7717, 'epoch': 2} {'type': 'loss', 'content': 0.10151483118534088, 'timestamp': '2025-09-10 02:35:01.188654', 'step': 7718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:01.221475', 'step': 7718, 'epoch': 2} {'type': 'loss', 'content': 0.09658290445804596, 'timestamp': '2025-09-10 02:35:01.223603', 'step': 7719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.253637', 'step': 7719, 'epoch': 2} {'type': 'loss', 'content': 0.19321665167808533, 'timestamp': '2025-09-10 02:35:01.277166', 'step': 7720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.309251', 'step': 7720, 'epoch': 2} {'type': 'loss', 'content': 0.13217793405056, 'timestamp': '2025-09-10 02:35:01.311433', 'step': 7721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:01.341705', 'step': 7721, 'epoch': 2} {'type': 'loss', 'content': 0.07732265442609787, 'timestamp': '2025-09-10 02:35:01.344123', 'step': 7722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.374365', 'step': 7722, 'epoch': 2} {'type': 'loss', 'content': 0.1420552134513855, 'timestamp': '2025-09-10 02:35:01.376720', 'step': 7723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:35:01.407972', 'step': 7723, 'epoch': 2} {'type': 'loss', 'content': 0.1378803849220276, 'timestamp': '2025-09-10 02:35:01.435852', 'step': 7724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.465739', 'step': 7724, 'epoch': 2} {'type': 'loss', 'content': 0.15062205493450165, 'timestamp': '2025-09-10 02:35:01.467931', 'step': 7725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:01.498026', 'step': 7725, 'epoch': 2} {'type': 'loss', 'content': 0.09582796692848206, 'timestamp': '2025-09-10 02:35:01.500256', 'step': 7726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.530368', 'step': 7726, 'epoch': 2} {'type': 'loss', 'content': 0.09754032641649246, 'timestamp': '2025-09-10 02:35:01.532683', 'step': 7727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:01.563689', 'step': 7727, 'epoch': 2} {'type': 'loss', 'content': 0.1844533532857895, 'timestamp': '2025-09-10 02:35:01.587356', 'step': 7728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.618417', 'step': 7728, 'epoch': 2} {'type': 'loss', 'content': 0.1117311492562294, 'timestamp': '2025-09-10 02:35:01.620741', 'step': 7729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:01.650724', 'step': 7729, 'epoch': 2} {'type': 'loss', 'content': 0.0864294096827507, 'timestamp': '2025-09-10 02:35:01.653267', 'step': 7730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:01.683137', 'step': 7730, 'epoch': 2} {'type': 'loss', 'content': 0.209225594997406, 'timestamp': '2025-09-10 02:35:01.685473', 'step': 7731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:01.717428', 'step': 7731, 'epoch': 2} {'type': 'loss', 'content': 0.1858024299144745, 'timestamp': '2025-09-10 02:35:01.740985', 'step': 7732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:01.772320', 'step': 7732, 'epoch': 2} {'type': 'loss', 'content': 0.08967027813196182, 'timestamp': '2025-09-10 02:35:01.774485', 'step': 7733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:01.804791', 'step': 7733, 'epoch': 2} {'type': 'loss', 'content': 0.1043093279004097, 'timestamp': '2025-09-10 02:35:01.807245', 'step': 7734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:01.839172', 'step': 7734, 'epoch': 2} {'type': 'loss', 'content': 0.10769132524728775, 'timestamp': '2025-09-10 02:35:01.841422', 'step': 7735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.871497', 'step': 7735, 'epoch': 2} {'type': 'loss', 'content': 0.08048975467681885, 'timestamp': '2025-09-10 02:35:01.895352', 'step': 7736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:01.925923', 'step': 7736, 'epoch': 2} {'type': 'loss', 'content': 0.13832306861877441, 'timestamp': '2025-09-10 02:35:01.928312', 'step': 7737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.958768', 'step': 7737, 'epoch': 2} {'type': 'loss', 'content': 0.12547633051872253, 'timestamp': '2025-09-10 02:35:01.961161', 'step': 7738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:01.991323', 'step': 7738, 'epoch': 2} {'type': 'loss', 'content': 0.12461579591035843, 'timestamp': '2025-09-10 02:35:01.993483', 'step': 7739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:02.023713', 'step': 7739, 'epoch': 2} {'type': 'loss', 'content': 0.15973246097564697, 'timestamp': '2025-09-10 02:35:02.047135', 'step': 7740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:02.077394', 'step': 7740, 'epoch': 2} {'type': 'loss', 'content': 0.054417334496974945, 'timestamp': '2025-09-10 02:35:02.079678', 'step': 7741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:02.110735', 'step': 7741, 'epoch': 2} {'type': 'loss', 'content': 0.13065442442893982, 'timestamp': '2025-09-10 02:35:02.112974', 'step': 7742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:02.143233', 'step': 7742, 'epoch': 2} {'type': 'loss', 'content': 0.08864076435565948, 'timestamp': '2025-09-10 02:35:02.145372', 'step': 7743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:02.175231', 'step': 7743, 'epoch': 2} {'type': 'loss', 'content': 0.06016741693019867, 'timestamp': '2025-09-10 02:35:02.198778', 'step': 7744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:02.230493', 'step': 7744, 'epoch': 2} {'type': 'loss', 'content': 0.15248331427574158, 'timestamp': '2025-09-10 02:35:02.233028', 'step': 7745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:02.262959', 'step': 7745, 'epoch': 2} {'type': 'loss', 'content': 0.2054053694009781, 'timestamp': '2025-09-10 02:35:02.265263', 'step': 7746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:02.295030', 'step': 7746, 'epoch': 2} {'type': 'loss', 'content': 0.21737730503082275, 'timestamp': '2025-09-10 02:35:02.297516', 'step': 7747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:02.328877', 'step': 7747, 'epoch': 2} {'type': 'loss', 'content': 0.255154550075531, 'timestamp': '2025-09-10 02:35:02.352515', 'step': 7748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:02.382699', 'step': 7748, 'epoch': 2} {'type': 'loss', 'content': 0.19427098333835602, 'timestamp': '2025-09-10 02:35:02.385029', 'step': 7749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:02.415749', 'step': 7749, 'epoch': 2} {'type': 'loss', 'content': 0.10482127964496613, 'timestamp': '2025-09-10 02:35:02.418388', 'step': 7750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:02.449783', 'step': 7750, 'epoch': 2} {'type': 'loss', 'content': 0.09242694079875946, 'timestamp': '2025-09-10 02:35:02.454504', 'step': 7751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:02.484880', 'step': 7751, 'epoch': 2} {'type': 'loss', 'content': 0.20669320225715637, 'timestamp': '2025-09-10 02:35:02.508343', 'step': 7752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:02.538950', 'step': 7752, 'epoch': 2} {'type': 'loss', 'content': 0.1986754685640335, 'timestamp': '2025-09-10 02:35:02.541340', 'step': 7753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:02.571137', 'step': 7753, 'epoch': 2} {'type': 'loss', 'content': 0.17758211493492126, 'timestamp': '2025-09-10 02:35:02.574636', 'step': 7754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:02.604398', 'step': 7754, 'epoch': 2} {'type': 'loss', 'content': 0.1574569195508957, 'timestamp': '2025-09-10 02:35:02.606520', 'step': 7755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:02.636023', 'step': 7755, 'epoch': 2} {'type': 'loss', 'content': 0.08904263377189636, 'timestamp': '2025-09-10 02:35:02.659569', 'step': 7756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:02.689511', 'step': 7756, 'epoch': 2} {'type': 'loss', 'content': 0.13128118216991425, 'timestamp': '2025-09-10 02:35:02.691873', 'step': 7757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:02.721854', 'step': 7757, 'epoch': 2} {'type': 'loss', 'content': 0.12485750019550323, 'timestamp': '2025-09-10 02:35:02.724006', 'step': 7758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:02.754978', 'step': 7758, 'epoch': 2} {'type': 'loss', 'content': 0.10027030110359192, 'timestamp': '2025-09-10 02:35:02.757458', 'step': 7759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:02.787732', 'step': 7759, 'epoch': 2} {'type': 'loss', 'content': 0.14679637551307678, 'timestamp': '2025-09-10 02:35:02.811499', 'step': 7760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:02.842115', 'step': 7760, 'epoch': 2} {'type': 'loss', 'content': 0.1922815889120102, 'timestamp': '2025-09-10 02:35:02.848248', 'step': 7761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:02.884065', 'step': 7761, 'epoch': 2} {'type': 'loss', 'content': 0.15763360261917114, 'timestamp': '2025-09-10 02:35:02.886589', 'step': 7762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:02.916965', 'step': 7762, 'epoch': 2} {'type': 'loss', 'content': 0.134605273604393, 'timestamp': '2025-09-10 02:35:02.919302', 'step': 7763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:02.950026', 'step': 7763, 'epoch': 2} {'type': 'loss', 'content': 0.13420158624649048, 'timestamp': '2025-09-10 02:35:02.974197', 'step': 7764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:35:03.011695', 'step': 7764, 'epoch': 2} {'type': 'loss', 'content': 0.1091422364115715, 'timestamp': '2025-09-10 02:35:03.027605', 'step': 7765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.059513', 'step': 7765, 'epoch': 2} {'type': 'loss', 'content': 0.18765957653522491, 'timestamp': '2025-09-10 02:35:03.062654', 'step': 7766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:03.094028', 'step': 7766, 'epoch': 2} {'type': 'loss', 'content': 0.1409619301557541, 'timestamp': '2025-09-10 02:35:03.096416', 'step': 7767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:03.127308', 'step': 7767, 'epoch': 2} {'type': 'loss', 'content': 0.11110083013772964, 'timestamp': '2025-09-10 02:35:03.151091', 'step': 7768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.180935', 'step': 7768, 'epoch': 2} {'type': 'loss', 'content': 0.12983807921409607, 'timestamp': '2025-09-10 02:35:03.183352', 'step': 7769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:03.212968', 'step': 7769, 'epoch': 2} {'type': 'loss', 'content': 0.1500413864850998, 'timestamp': '2025-09-10 02:35:03.215398', 'step': 7770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.245075', 'step': 7770, 'epoch': 2} {'type': 'loss', 'content': 0.17986711859703064, 'timestamp': '2025-09-10 02:35:03.247842', 'step': 7771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:03.278305', 'step': 7771, 'epoch': 2} {'type': 'loss', 'content': 0.08720463514328003, 'timestamp': '2025-09-10 02:35:03.302035', 'step': 7772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.334325', 'step': 7772, 'epoch': 2} {'type': 'loss', 'content': 0.07135629653930664, 'timestamp': '2025-09-10 02:35:03.336740', 'step': 7773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.367261', 'step': 7773, 'epoch': 2} {'type': 'loss', 'content': 0.12091243267059326, 'timestamp': '2025-09-10 02:35:03.369639', 'step': 7774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:03.399869', 'step': 7774, 'epoch': 2} {'type': 'loss', 'content': 0.11126192659139633, 'timestamp': '2025-09-10 02:35:03.403107', 'step': 7775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:03.436343', 'step': 7775, 'epoch': 2} {'type': 'loss', 'content': 0.09329207986593246, 'timestamp': '2025-09-10 02:35:03.459978', 'step': 7776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:03.490641', 'step': 7776, 'epoch': 2} {'type': 'loss', 'content': 0.0949898436665535, 'timestamp': '2025-09-10 02:35:03.493189', 'step': 7777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:03.524554', 'step': 7777, 'epoch': 2} {'type': 'loss', 'content': 0.09972254931926727, 'timestamp': '2025-09-10 02:35:03.526912', 'step': 7778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.557042', 'step': 7778, 'epoch': 2} {'type': 'loss', 'content': 0.161892831325531, 'timestamp': '2025-09-10 02:35:03.559427', 'step': 7779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.589296', 'step': 7779, 'epoch': 2} {'type': 'loss', 'content': 0.14442744851112366, 'timestamp': '2025-09-10 02:35:03.612998', 'step': 7780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:03.643275', 'step': 7780, 'epoch': 2} {'type': 'loss', 'content': 0.22139930725097656, 'timestamp': '2025-09-10 02:35:03.646443', 'step': 7781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.677023', 'step': 7781, 'epoch': 2} {'type': 'loss', 'content': 0.12304580956697464, 'timestamp': '2025-09-10 02:35:03.679335', 'step': 7782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:03.709211', 'step': 7782, 'epoch': 2} {'type': 'loss', 'content': 0.10453914850950241, 'timestamp': '2025-09-10 02:35:03.713183', 'step': 7783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.743519', 'step': 7783, 'epoch': 2} {'type': 'loss', 'content': 0.08614153414964676, 'timestamp': '2025-09-10 02:35:03.767135', 'step': 7784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:03.798663', 'step': 7784, 'epoch': 2} {'type': 'loss', 'content': 0.09978561103343964, 'timestamp': '2025-09-10 02:35:03.801276', 'step': 7785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:03.832825', 'step': 7785, 'epoch': 2} {'type': 'loss', 'content': 0.19886183738708496, 'timestamp': '2025-09-10 02:35:03.835419', 'step': 7786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:03.866901', 'step': 7786, 'epoch': 2} {'type': 'loss', 'content': 0.10944568365812302, 'timestamp': '2025-09-10 02:35:03.869392', 'step': 7787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:03.899876', 'step': 7787, 'epoch': 2} {'type': 'loss', 'content': 0.1695399433374405, 'timestamp': '2025-09-10 02:35:03.923416', 'step': 7788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:03.956406', 'step': 7788, 'epoch': 2} {'type': 'loss', 'content': 0.1462208330631256, 'timestamp': '2025-09-10 02:35:03.961708', 'step': 7789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:04.002128', 'step': 7789, 'epoch': 2} {'type': 'loss', 'content': 0.07799035310745239, 'timestamp': '2025-09-10 02:35:04.004996', 'step': 7790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:04.036247', 'step': 7790, 'epoch': 2} {'type': 'loss', 'content': 0.1720697283744812, 'timestamp': '2025-09-10 02:35:04.038754', 'step': 7791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:04.070478', 'step': 7791, 'epoch': 2} {'type': 'loss', 'content': 0.1635095775127411, 'timestamp': '2025-09-10 02:35:04.094365', 'step': 7792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:04.125746', 'step': 7792, 'epoch': 2} {'type': 'loss', 'content': 0.09510866552591324, 'timestamp': '2025-09-10 02:35:04.128448', 'step': 7793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:04.158747', 'step': 7793, 'epoch': 2} {'type': 'loss', 'content': 0.18741297721862793, 'timestamp': '2025-09-10 02:35:04.161573', 'step': 7794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:04.193320', 'step': 7794, 'epoch': 2} {'type': 'loss', 'content': 0.07316791266202927, 'timestamp': '2025-09-10 02:35:04.195966', 'step': 7795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:04.227821', 'step': 7795, 'epoch': 2} {'type': 'loss', 'content': 0.12421349436044693, 'timestamp': '2025-09-10 02:35:04.251561', 'step': 7796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:04.284323', 'step': 7796, 'epoch': 2} {'type': 'loss', 'content': 0.14982956647872925, 'timestamp': '2025-09-10 02:35:04.286563', 'step': 7797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:04.317363', 'step': 7797, 'epoch': 2} {'type': 'loss', 'content': 0.09821944683790207, 'timestamp': '2025-09-10 02:35:04.320772', 'step': 7798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:04.351016', 'step': 7798, 'epoch': 2} {'type': 'loss', 'content': 0.13155974447727203, 'timestamp': '2025-09-10 02:35:04.353554', 'step': 7799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:04.384141', 'step': 7799, 'epoch': 2} {'type': 'loss', 'content': 0.14377398788928986, 'timestamp': '2025-09-10 02:35:04.407716', 'step': 7800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:04.438498', 'step': 7800, 'epoch': 2} {'type': 'loss', 'content': 0.1240958720445633, 'timestamp': '2025-09-10 02:35:04.440888', 'step': 7801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:04.470773', 'step': 7801, 'epoch': 2} {'type': 'loss', 'content': 0.13164815306663513, 'timestamp': '2025-09-10 02:35:04.473159', 'step': 7802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:04.504336', 'step': 7802, 'epoch': 2} {'type': 'loss', 'content': 0.10536995530128479, 'timestamp': '2025-09-10 02:35:04.506379', 'step': 7803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:04.536216', 'step': 7803, 'epoch': 2} {'type': 'loss', 'content': 0.1237318366765976, 'timestamp': '2025-09-10 02:35:04.559884', 'step': 7804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:04.590302', 'step': 7804, 'epoch': 2} {'type': 'loss', 'content': 0.18434467911720276, 'timestamp': '2025-09-10 02:35:04.592911', 'step': 7805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:04.623725', 'step': 7805, 'epoch': 2} {'type': 'loss', 'content': 0.19994215667247772, 'timestamp': '2025-09-10 02:35:04.626128', 'step': 7806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:04.656367', 'step': 7806, 'epoch': 2} {'type': 'loss', 'content': 0.1847873032093048, 'timestamp': '2025-09-10 02:35:04.658429', 'step': 7807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:04.688599', 'step': 7807, 'epoch': 2} {'type': 'loss', 'content': 0.0928962230682373, 'timestamp': '2025-09-10 02:35:04.712177', 'step': 7808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:04.743164', 'step': 7808, 'epoch': 2} {'type': 'loss', 'content': 0.11163751035928726, 'timestamp': '2025-09-10 02:35:04.745612', 'step': 7809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:04.776472', 'step': 7809, 'epoch': 2} {'type': 'loss', 'content': 0.08589909970760345, 'timestamp': '2025-09-10 02:35:04.779124', 'step': 7810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:04.809288', 'step': 7810, 'epoch': 2} {'type': 'loss', 'content': 0.1332974135875702, 'timestamp': '2025-09-10 02:35:04.811653', 'step': 7811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:04.841501', 'step': 7811, 'epoch': 2} {'type': 'loss', 'content': 0.15419895946979523, 'timestamp': '2025-09-10 02:35:04.865612', 'step': 7812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:04.898739', 'step': 7812, 'epoch': 2} {'type': 'loss', 'content': 0.06820179522037506, 'timestamp': '2025-09-10 02:35:04.901185', 'step': 7813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:04.931124', 'step': 7813, 'epoch': 2} {'type': 'loss', 'content': 0.07655912637710571, 'timestamp': '2025-09-10 02:35:04.934387', 'step': 7814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:04.966256', 'step': 7814, 'epoch': 2} {'type': 'loss', 'content': 0.06926395744085312, 'timestamp': '2025-09-10 02:35:04.971395', 'step': 7815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:05.006810', 'step': 7815, 'epoch': 2} {'type': 'loss', 'content': 0.2645775079727173, 'timestamp': '2025-09-10 02:35:05.030283', 'step': 7816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:05.060628', 'step': 7816, 'epoch': 2} {'type': 'loss', 'content': 0.14970973134040833, 'timestamp': '2025-09-10 02:35:05.062690', 'step': 7817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:05.092759', 'step': 7817, 'epoch': 2} {'type': 'loss', 'content': 0.06024515628814697, 'timestamp': '2025-09-10 02:35:05.094921', 'step': 7818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:05.125287', 'step': 7818, 'epoch': 2} {'type': 'loss', 'content': 0.08711740374565125, 'timestamp': '2025-09-10 02:35:05.127427', 'step': 7819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:05.159652', 'step': 7819, 'epoch': 2} {'type': 'loss', 'content': 0.17384691536426544, 'timestamp': '2025-09-10 02:35:05.183048', 'step': 7820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:05.213603', 'step': 7820, 'epoch': 2} {'type': 'loss', 'content': 0.07535801082849503, 'timestamp': '2025-09-10 02:35:05.215899', 'step': 7821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:05.245687', 'step': 7821, 'epoch': 2} {'type': 'loss', 'content': 0.1002751812338829, 'timestamp': '2025-09-10 02:35:05.248231', 'step': 7822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:05.277957', 'step': 7822, 'epoch': 2} {'type': 'loss', 'content': 0.06347166001796722, 'timestamp': '2025-09-10 02:35:05.280501', 'step': 7823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:05.311191', 'step': 7823, 'epoch': 2} {'type': 'loss', 'content': 0.10508117824792862, 'timestamp': '2025-09-10 02:35:05.334522', 'step': 7824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:05.365254', 'step': 7824, 'epoch': 2} {'type': 'loss', 'content': 0.20285505056381226, 'timestamp': '2025-09-10 02:35:05.367486', 'step': 7825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:05.397567', 'step': 7825, 'epoch': 2} {'type': 'loss', 'content': 0.11978472024202347, 'timestamp': '2025-09-10 02:35:05.399687', 'step': 7826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:05.431203', 'step': 7826, 'epoch': 2} {'type': 'loss', 'content': 0.29532095789909363, 'timestamp': '2025-09-10 02:35:05.434665', 'step': 7827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:05.464858', 'step': 7827, 'epoch': 2} {'type': 'loss', 'content': 0.10894200205802917, 'timestamp': '2025-09-10 02:35:05.488330', 'step': 7828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:05.519824', 'step': 7828, 'epoch': 2} {'type': 'loss', 'content': 0.1857864409685135, 'timestamp': '2025-09-10 02:35:05.523314', 'step': 7829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:05.553140', 'step': 7829, 'epoch': 2} {'type': 'loss', 'content': 0.057171303778886795, 'timestamp': '2025-09-10 02:35:05.555354', 'step': 7830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:05.585773', 'step': 7830, 'epoch': 2} {'type': 'loss', 'content': 0.12873245775699615, 'timestamp': '2025-09-10 02:35:05.588373', 'step': 7831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:05.618612', 'step': 7831, 'epoch': 2} {'type': 'loss', 'content': 0.18357153236865997, 'timestamp': '2025-09-10 02:35:05.641959', 'step': 7832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:35:05.672164', 'step': 7832, 'epoch': 2} {'type': 'loss', 'content': 0.11489406228065491, 'timestamp': '2025-09-10 02:35:05.674404', 'step': 7833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:05.704826', 'step': 7833, 'epoch': 2} {'type': 'loss', 'content': 0.13711929321289062, 'timestamp': '2025-09-10 02:35:05.707301', 'step': 7834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:05.739403', 'step': 7834, 'epoch': 2} {'type': 'loss', 'content': 0.12371775507926941, 'timestamp': '2025-09-10 02:35:05.741561', 'step': 7835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:05.771767', 'step': 7835, 'epoch': 2} {'type': 'loss', 'content': 0.08038804680109024, 'timestamp': '2025-09-10 02:35:05.795341', 'step': 7836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:05.826166', 'step': 7836, 'epoch': 2} {'type': 'loss', 'content': 0.14701810479164124, 'timestamp': '2025-09-10 02:35:05.828298', 'step': 7837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:05.858123', 'step': 7837, 'epoch': 2} {'type': 'loss', 'content': 0.10291272401809692, 'timestamp': '2025-09-10 02:35:05.866050', 'step': 7838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:05.904477', 'step': 7838, 'epoch': 2} {'type': 'loss', 'content': 0.14704594016075134, 'timestamp': '2025-09-10 02:35:05.906909', 'step': 7839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:05.938909', 'step': 7839, 'epoch': 2} {'type': 'loss', 'content': 0.2380610853433609, 'timestamp': '2025-09-10 02:35:05.963689', 'step': 7840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:05.993730', 'step': 7840, 'epoch': 2} {'type': 'loss', 'content': 0.14783290028572083, 'timestamp': '2025-09-10 02:35:05.995974', 'step': 7841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:06.027131', 'step': 7841, 'epoch': 2} {'type': 'loss', 'content': 0.27243727445602417, 'timestamp': '2025-09-10 02:35:06.029455', 'step': 7842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:06.059311', 'step': 7842, 'epoch': 2} {'type': 'loss', 'content': 0.061420612037181854, 'timestamp': '2025-09-10 02:35:06.061452', 'step': 7843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:06.090994', 'step': 7843, 'epoch': 2} {'type': 'loss', 'content': 0.11508168280124664, 'timestamp': '2025-09-10 02:35:06.114852', 'step': 7844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:06.144410', 'step': 7844, 'epoch': 2} {'type': 'loss', 'content': 0.1514357030391693, 'timestamp': '2025-09-10 02:35:06.146674', 'step': 7845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:06.176635', 'step': 7845, 'epoch': 2} {'type': 'loss', 'content': 0.08366026729345322, 'timestamp': '2025-09-10 02:35:06.178890', 'step': 7846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:06.208824', 'step': 7846, 'epoch': 2} {'type': 'loss', 'content': 0.057078443467617035, 'timestamp': '2025-09-10 02:35:06.211336', 'step': 7847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:06.244200', 'step': 7847, 'epoch': 2} {'type': 'loss', 'content': 0.17505571246147156, 'timestamp': '2025-09-10 02:35:06.267814', 'step': 7848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:06.298317', 'step': 7848, 'epoch': 2} {'type': 'loss', 'content': 0.17923329770565033, 'timestamp': '2025-09-10 02:35:06.300473', 'step': 7849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:06.330968', 'step': 7849, 'epoch': 2} {'type': 'loss', 'content': 0.1178288459777832, 'timestamp': '2025-09-10 02:35:06.334335', 'step': 7850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:06.365862', 'step': 7850, 'epoch': 2} {'type': 'loss', 'content': 0.14911209046840668, 'timestamp': '2025-09-10 02:35:06.367980', 'step': 7851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:06.398199', 'step': 7851, 'epoch': 2} {'type': 'loss', 'content': 0.14942051470279694, 'timestamp': '2025-09-10 02:35:06.421922', 'step': 7852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:06.452801', 'step': 7852, 'epoch': 2} {'type': 'loss', 'content': 0.09577017277479172, 'timestamp': '2025-09-10 02:35:06.455495', 'step': 7853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:06.486905', 'step': 7853, 'epoch': 2} {'type': 'loss', 'content': 0.08388921618461609, 'timestamp': '2025-09-10 02:35:06.489007', 'step': 7854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:06.518815', 'step': 7854, 'epoch': 2} {'type': 'loss', 'content': 0.21741290390491486, 'timestamp': '2025-09-10 02:35:06.521458', 'step': 7855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:06.551036', 'step': 7855, 'epoch': 2} {'type': 'loss', 'content': 0.09838024526834488, 'timestamp': '2025-09-10 02:35:06.574456', 'step': 7856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:06.605569', 'step': 7856, 'epoch': 2} {'type': 'loss', 'content': 0.10708391666412354, 'timestamp': '2025-09-10 02:35:06.607670', 'step': 7857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:06.637803', 'step': 7857, 'epoch': 2} {'type': 'loss', 'content': 0.2428285777568817, 'timestamp': '2025-09-10 02:35:06.639935', 'step': 7858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:06.669718', 'step': 7858, 'epoch': 2} {'type': 'loss', 'content': 0.12727244198322296, 'timestamp': '2025-09-10 02:35:06.671958', 'step': 7859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:06.701594', 'step': 7859, 'epoch': 2} {'type': 'loss', 'content': 0.12395273894071579, 'timestamp': '2025-09-10 02:35:06.725332', 'step': 7860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:06.754885', 'step': 7860, 'epoch': 2} {'type': 'loss', 'content': 0.16237995028495789, 'timestamp': '2025-09-10 02:35:06.757093', 'step': 7861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:06.787527', 'step': 7861, 'epoch': 2} {'type': 'loss', 'content': 0.11103933304548264, 'timestamp': '2025-09-10 02:35:06.790087', 'step': 7862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:06.820232', 'step': 7862, 'epoch': 2} {'type': 'loss', 'content': 0.1479656994342804, 'timestamp': '2025-09-10 02:35:06.822647', 'step': 7863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:06.852221', 'step': 7863, 'epoch': 2} {'type': 'loss', 'content': 0.07480333000421524, 'timestamp': '2025-09-10 02:35:06.875559', 'step': 7864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:06.905626', 'step': 7864, 'epoch': 2} {'type': 'loss', 'content': 0.17463627457618713, 'timestamp': '2025-09-10 02:35:06.908227', 'step': 7865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:06.938649', 'step': 7865, 'epoch': 2} {'type': 'loss', 'content': 0.10764192044734955, 'timestamp': '2025-09-10 02:35:06.941167', 'step': 7866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:06.970798', 'step': 7866, 'epoch': 2} {'type': 'loss', 'content': 0.0740327462553978, 'timestamp': '2025-09-10 02:35:06.973164', 'step': 7867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:07.003691', 'step': 7867, 'epoch': 2} {'type': 'loss', 'content': 0.11364998668432236, 'timestamp': '2025-09-10 02:35:07.027365', 'step': 7868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:07.057187', 'step': 7868, 'epoch': 2} {'type': 'loss', 'content': 0.128244549036026, 'timestamp': '2025-09-10 02:35:07.059408', 'step': 7869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:07.089083', 'step': 7869, 'epoch': 2} {'type': 'loss', 'content': 0.1356099545955658, 'timestamp': '2025-09-10 02:35:07.091415', 'step': 7870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:07.121651', 'step': 7870, 'epoch': 2} {'type': 'loss', 'content': 0.19504117965698242, 'timestamp': '2025-09-10 02:35:07.124301', 'step': 7871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:07.154542', 'step': 7871, 'epoch': 2} {'type': 'loss', 'content': 0.1507827341556549, 'timestamp': '2025-09-10 02:35:07.178701', 'step': 7872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:07.208982', 'step': 7872, 'epoch': 2} {'type': 'loss', 'content': 0.12165416777133942, 'timestamp': '2025-09-10 02:35:07.210852', 'step': 7873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:07.240478', 'step': 7873, 'epoch': 2} {'type': 'loss', 'content': 0.08905628323554993, 'timestamp': '2025-09-10 02:35:07.242918', 'step': 7874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:07.272932', 'step': 7874, 'epoch': 2} {'type': 'loss', 'content': 0.09220031648874283, 'timestamp': '2025-09-10 02:35:07.275177', 'step': 7875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:07.305826', 'step': 7875, 'epoch': 2} {'type': 'loss', 'content': 0.08466494083404541, 'timestamp': '2025-09-10 02:35:07.329242', 'step': 7876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:07.360440', 'step': 7876, 'epoch': 2} {'type': 'loss', 'content': 0.0766596719622612, 'timestamp': '2025-09-10 02:35:07.362704', 'step': 7877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:07.392389', 'step': 7877, 'epoch': 2} {'type': 'loss', 'content': 0.08374323695898056, 'timestamp': '2025-09-10 02:35:07.394627', 'step': 7878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:07.424720', 'step': 7878, 'epoch': 2} {'type': 'loss', 'content': 0.07632619142532349, 'timestamp': '2025-09-10 02:35:07.427201', 'step': 7879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:07.457692', 'step': 7879, 'epoch': 2} {'type': 'loss', 'content': 0.12052921950817108, 'timestamp': '2025-09-10 02:35:07.482137', 'step': 7880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:07.512282', 'step': 7880, 'epoch': 2} {'type': 'loss', 'content': 0.09063486754894257, 'timestamp': '2025-09-10 02:35:07.514499', 'step': 7881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:07.544884', 'step': 7881, 'epoch': 2} {'type': 'loss', 'content': 0.18321989476680756, 'timestamp': '2025-09-10 02:35:07.547203', 'step': 7882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:07.577131', 'step': 7882, 'epoch': 2} {'type': 'loss', 'content': 0.1202516108751297, 'timestamp': '2025-09-10 02:35:07.580130', 'step': 7883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:07.610194', 'step': 7883, 'epoch': 2} {'type': 'loss', 'content': 0.13248580694198608, 'timestamp': '2025-09-10 02:35:07.633478', 'step': 7884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:07.664031', 'step': 7884, 'epoch': 2} {'type': 'loss', 'content': 0.07623953372240067, 'timestamp': '2025-09-10 02:35:07.666283', 'step': 7885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:07.696254', 'step': 7885, 'epoch': 2} {'type': 'loss', 'content': 0.13300924003124237, 'timestamp': '2025-09-10 02:35:07.699788', 'step': 7886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:07.730992', 'step': 7886, 'epoch': 2} {'type': 'loss', 'content': 0.2275293618440628, 'timestamp': '2025-09-10 02:35:07.733318', 'step': 7887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:07.764529', 'step': 7887, 'epoch': 2} {'type': 'loss', 'content': 0.213819682598114, 'timestamp': '2025-09-10 02:35:07.788326', 'step': 7888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:07.818315', 'step': 7888, 'epoch': 2} {'type': 'loss', 'content': 0.10941044241189957, 'timestamp': '2025-09-10 02:35:07.820425', 'step': 7889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:07.850846', 'step': 7889, 'epoch': 2} {'type': 'loss', 'content': 0.03261478990316391, 'timestamp': '2025-09-10 02:35:07.852979', 'step': 7890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:07.883607', 'step': 7890, 'epoch': 2} {'type': 'loss', 'content': 0.11651746928691864, 'timestamp': '2025-09-10 02:35:07.886332', 'step': 7891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:07.916559', 'step': 7891, 'epoch': 2} {'type': 'loss', 'content': 0.17283281683921814, 'timestamp': '2025-09-10 02:35:07.939950', 'step': 7892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:07.971562', 'step': 7892, 'epoch': 2} {'type': 'loss', 'content': 0.09734085202217102, 'timestamp': '2025-09-10 02:35:07.973788', 'step': 7893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:08.003479', 'step': 7893, 'epoch': 2} {'type': 'loss', 'content': 0.06845586001873016, 'timestamp': '2025-09-10 02:35:08.005586', 'step': 7894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:08.035976', 'step': 7894, 'epoch': 2} {'type': 'loss', 'content': 0.16769440472126007, 'timestamp': '2025-09-10 02:35:08.038340', 'step': 7895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:08.068016', 'step': 7895, 'epoch': 2} {'type': 'loss', 'content': 0.09473484009504318, 'timestamp': '2025-09-10 02:35:08.091470', 'step': 7896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:08.121791', 'step': 7896, 'epoch': 2} {'type': 'loss', 'content': 0.1696508824825287, 'timestamp': '2025-09-10 02:35:08.123986', 'step': 7897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:08.153877', 'step': 7897, 'epoch': 2} {'type': 'loss', 'content': 0.14298215508460999, 'timestamp': '2025-09-10 02:35:08.156271', 'step': 7898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:08.190515', 'step': 7898, 'epoch': 2} {'type': 'loss', 'content': 0.07871724665164948, 'timestamp': '2025-09-10 02:35:08.193056', 'step': 7899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:08.222729', 'step': 7899, 'epoch': 2} {'type': 'loss', 'content': 0.19002170860767365, 'timestamp': '2025-09-10 02:35:08.245991', 'step': 7900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:08.275870', 'step': 7900, 'epoch': 2} {'type': 'loss', 'content': 0.11898909509181976, 'timestamp': '2025-09-10 02:35:08.278435', 'step': 7901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:08.308758', 'step': 7901, 'epoch': 2} {'type': 'loss', 'content': 0.0931486040353775, 'timestamp': '2025-09-10 02:35:08.311086', 'step': 7902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:08.342731', 'step': 7902, 'epoch': 2} {'type': 'loss', 'content': 0.1431429535150528, 'timestamp': '2025-09-10 02:35:08.344924', 'step': 7903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:08.374415', 'step': 7903, 'epoch': 2} {'type': 'loss', 'content': 0.166542649269104, 'timestamp': '2025-09-10 02:35:08.398003', 'step': 7904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:08.428247', 'step': 7904, 'epoch': 2} {'type': 'loss', 'content': 0.1557805985212326, 'timestamp': '2025-09-10 02:35:08.430329', 'step': 7905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:08.460966', 'step': 7905, 'epoch': 2} {'type': 'loss', 'content': 0.14063239097595215, 'timestamp': '2025-09-10 02:35:08.463066', 'step': 7906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:08.492754', 'step': 7906, 'epoch': 2} {'type': 'loss', 'content': 0.1716708093881607, 'timestamp': '2025-09-10 02:35:08.495237', 'step': 7907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:08.525945', 'step': 7907, 'epoch': 2} {'type': 'loss', 'content': 0.12031657248735428, 'timestamp': '2025-09-10 02:35:08.549593', 'step': 7908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:08.579736', 'step': 7908, 'epoch': 2} {'type': 'loss', 'content': 0.14754818379878998, 'timestamp': '2025-09-10 02:35:08.582010', 'step': 7909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:08.612533', 'step': 7909, 'epoch': 2} {'type': 'loss', 'content': 0.10661827772855759, 'timestamp': '2025-09-10 02:35:08.614744', 'step': 7910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:08.643997', 'step': 7910, 'epoch': 2} {'type': 'loss', 'content': 0.11747100204229355, 'timestamp': '2025-09-10 02:35:08.646479', 'step': 7911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:08.676742', 'step': 7911, 'epoch': 2} {'type': 'loss', 'content': 0.10895533859729767, 'timestamp': '2025-09-10 02:35:08.700326', 'step': 7912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:08.731407', 'step': 7912, 'epoch': 2} {'type': 'loss', 'content': 0.14608366787433624, 'timestamp': '2025-09-10 02:35:08.734238', 'step': 7913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:08.764595', 'step': 7913, 'epoch': 2} {'type': 'loss', 'content': 0.1715078204870224, 'timestamp': '2025-09-10 02:35:08.767314', 'step': 7914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:08.797521', 'step': 7914, 'epoch': 2} {'type': 'loss', 'content': 0.13814613223075867, 'timestamp': '2025-09-10 02:35:08.799981', 'step': 7915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:08.829847', 'step': 7915, 'epoch': 2} {'type': 'loss', 'content': 0.09849349409341812, 'timestamp': '2025-09-10 02:35:08.853237', 'step': 7916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:08.884148', 'step': 7916, 'epoch': 2} {'type': 'loss', 'content': 0.14886802434921265, 'timestamp': '2025-09-10 02:35:08.887107', 'step': 7917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:08.918193', 'step': 7917, 'epoch': 2} {'type': 'loss', 'content': 0.08943699300289154, 'timestamp': '2025-09-10 02:35:08.920825', 'step': 7918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:08.951242', 'step': 7918, 'epoch': 2} {'type': 'loss', 'content': 0.11825490742921829, 'timestamp': '2025-09-10 02:35:08.953412', 'step': 7919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:08.983476', 'step': 7919, 'epoch': 2} {'type': 'loss', 'content': 0.17692385613918304, 'timestamp': '2025-09-10 02:35:09.007469', 'step': 7920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:09.037872', 'step': 7920, 'epoch': 2} {'type': 'loss', 'content': 0.11498235911130905, 'timestamp': '2025-09-10 02:35:09.040470', 'step': 7921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:09.070396', 'step': 7921, 'epoch': 2} {'type': 'loss', 'content': 0.15288479626178741, 'timestamp': '2025-09-10 02:35:09.073806', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:35:17.104636', 'step': 7922, 'epoch': 2} {'type': 'pplx', 'content': 10766.819977919582, 'timestamp': '2025-09-10 02:35:17.107992', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:17.137159', 'step': 7922, 'epoch': 2} {'type': 'loss', 'content': 0.06439675390720367, 'timestamp': '2025-09-10 02:35:17.139626', 'step': 7923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:17.170750', 'step': 7923, 'epoch': 2} {'type': 'loss', 'content': 0.15337267518043518, 'timestamp': '2025-09-10 02:35:17.194108', 'step': 7924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:17.236800', 'step': 7924, 'epoch': 2} {'type': 'loss', 'content': 0.0989186018705368, 'timestamp': '2025-09-10 02:35:17.242379', 'step': 7925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:17.278698', 'step': 7925, 'epoch': 2} {'type': 'loss', 'content': 0.08451154083013535, 'timestamp': '2025-09-10 02:35:17.281181', 'step': 7926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:17.313236', 'step': 7926, 'epoch': 2} {'type': 'loss', 'content': 0.09762044250965118, 'timestamp': '2025-09-10 02:35:17.315587', 'step': 7927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:17.345581', 'step': 7927, 'epoch': 2} {'type': 'loss', 'content': 0.13877511024475098, 'timestamp': '2025-09-10 02:35:17.369043', 'step': 7928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:17.403045', 'step': 7928, 'epoch': 2} {'type': 'loss', 'content': 0.16484926640987396, 'timestamp': '2025-09-10 02:35:17.405290', 'step': 7929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:17.437284', 'step': 7929, 'epoch': 2} {'type': 'loss', 'content': 0.0651295855641365, 'timestamp': '2025-09-10 02:35:17.439425', 'step': 7930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:17.483908', 'step': 7930, 'epoch': 2} {'type': 'loss', 'content': 0.18669123947620392, 'timestamp': '2025-09-10 02:35:17.486235', 'step': 7931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:17.520056', 'step': 7931, 'epoch': 2} {'type': 'loss', 'content': 0.13871152698993683, 'timestamp': '2025-09-10 02:35:17.543584', 'step': 7932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:17.573677', 'step': 7932, 'epoch': 2} {'type': 'loss', 'content': 0.09586146473884583, 'timestamp': '2025-09-10 02:35:17.577081', 'step': 7933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:17.623492', 'step': 7933, 'epoch': 2} {'type': 'loss', 'content': 0.05813094973564148, 'timestamp': '2025-09-10 02:35:17.625905', 'step': 7934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:17.656969', 'step': 7934, 'epoch': 2} {'type': 'loss', 'content': 0.19223348796367645, 'timestamp': '2025-09-10 02:35:17.659150', 'step': 7935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:17.689279', 'step': 7935, 'epoch': 2} {'type': 'loss', 'content': 0.05763409286737442, 'timestamp': '2025-09-10 02:35:17.716977', 'step': 7936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:17.748533', 'step': 7936, 'epoch': 2} {'type': 'loss', 'content': 0.13496580719947815, 'timestamp': '2025-09-10 02:35:17.750793', 'step': 7937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:17.792513', 'step': 7937, 'epoch': 2} {'type': 'loss', 'content': 0.10188925266265869, 'timestamp': '2025-09-10 02:35:17.795243', 'step': 7938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:17.827089', 'step': 7938, 'epoch': 2} {'type': 'loss', 'content': 0.11869747191667557, 'timestamp': '2025-09-10 02:35:17.829725', 'step': 7939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:17.860017', 'step': 7939, 'epoch': 2} {'type': 'loss', 'content': 0.259259968996048, 'timestamp': '2025-09-10 02:35:17.884083', 'step': 7940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:35:17.915695', 'step': 7940, 'epoch': 2} {'type': 'loss', 'content': 0.14367452263832092, 'timestamp': '2025-09-10 02:35:17.921854', 'step': 7941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:17.953962', 'step': 7941, 'epoch': 2} {'type': 'loss', 'content': 0.13866391777992249, 'timestamp': '2025-09-10 02:35:17.956233', 'step': 7942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:17.988187', 'step': 7942, 'epoch': 2} {'type': 'loss', 'content': 0.09901859611272812, 'timestamp': '2025-09-10 02:35:17.990406', 'step': 7943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:18.021884', 'step': 7943, 'epoch': 2} {'type': 'loss', 'content': 0.16308937966823578, 'timestamp': '2025-09-10 02:35:18.045516', 'step': 7944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:18.075951', 'step': 7944, 'epoch': 2} {'type': 'loss', 'content': 0.08006000518798828, 'timestamp': '2025-09-10 02:35:18.078286', 'step': 7945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:18.108736', 'step': 7945, 'epoch': 2} {'type': 'loss', 'content': 0.08876915276050568, 'timestamp': '2025-09-10 02:35:18.113355', 'step': 7946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:18.146939', 'step': 7946, 'epoch': 2} {'type': 'loss', 'content': 0.03689694032073021, 'timestamp': '2025-09-10 02:35:18.150981', 'step': 7947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:18.186284', 'step': 7947, 'epoch': 2} {'type': 'loss', 'content': 0.15968935191631317, 'timestamp': '2025-09-10 02:35:18.209821', 'step': 7948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:18.239792', 'step': 7948, 'epoch': 2} {'type': 'loss', 'content': 0.14145933091640472, 'timestamp': '2025-09-10 02:35:18.242246', 'step': 7949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:18.273223', 'step': 7949, 'epoch': 2} {'type': 'loss', 'content': 0.17182546854019165, 'timestamp': '2025-09-10 02:35:18.275583', 'step': 7950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:18.305478', 'step': 7950, 'epoch': 2} {'type': 'loss', 'content': 0.18611139059066772, 'timestamp': '2025-09-10 02:35:18.307712', 'step': 7951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:18.342781', 'step': 7951, 'epoch': 2} {'type': 'loss', 'content': 0.09649676829576492, 'timestamp': '2025-09-10 02:35:18.366025', 'step': 7952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:18.396185', 'step': 7952, 'epoch': 2} {'type': 'loss', 'content': 0.09439653903245926, 'timestamp': '2025-09-10 02:35:18.399892', 'step': 7953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:18.429996', 'step': 7953, 'epoch': 2} {'type': 'loss', 'content': 0.1450764685869217, 'timestamp': '2025-09-10 02:35:18.432448', 'step': 7954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:18.462639', 'step': 7954, 'epoch': 2} {'type': 'loss', 'content': 0.15275831520557404, 'timestamp': '2025-09-10 02:35:18.464853', 'step': 7955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:18.495264', 'step': 7955, 'epoch': 2} {'type': 'loss', 'content': 0.18602794408798218, 'timestamp': '2025-09-10 02:35:18.518721', 'step': 7956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:18.549783', 'step': 7956, 'epoch': 2} {'type': 'loss', 'content': 0.1262282282114029, 'timestamp': '2025-09-10 02:35:18.551977', 'step': 7957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:18.581987', 'step': 7957, 'epoch': 2} {'type': 'loss', 'content': 0.12498012185096741, 'timestamp': '2025-09-10 02:35:18.584218', 'step': 7958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:18.617197', 'step': 7958, 'epoch': 2} {'type': 'loss', 'content': 0.04677126184105873, 'timestamp': '2025-09-10 02:35:18.619521', 'step': 7959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:18.650527', 'step': 7959, 'epoch': 2} {'type': 'loss', 'content': 0.06104226037859917, 'timestamp': '2025-09-10 02:35:18.674069', 'step': 7960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:18.704485', 'step': 7960, 'epoch': 2} {'type': 'loss', 'content': 0.05395980179309845, 'timestamp': '2025-09-10 02:35:18.706802', 'step': 7961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:18.736961', 'step': 7961, 'epoch': 2} {'type': 'loss', 'content': 0.16415441036224365, 'timestamp': '2025-09-10 02:35:18.739074', 'step': 7962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:18.768407', 'step': 7962, 'epoch': 2} {'type': 'loss', 'content': 0.09602735936641693, 'timestamp': '2025-09-10 02:35:18.770826', 'step': 7963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:18.801189', 'step': 7963, 'epoch': 2} {'type': 'loss', 'content': 0.06903168559074402, 'timestamp': '2025-09-10 02:35:18.827014', 'step': 7964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:18.858309', 'step': 7964, 'epoch': 2} {'type': 'loss', 'content': 0.06801041960716248, 'timestamp': '2025-09-10 02:35:18.860583', 'step': 7965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:18.890637', 'step': 7965, 'epoch': 2} {'type': 'loss', 'content': 0.13697008788585663, 'timestamp': '2025-09-10 02:35:18.893193', 'step': 7966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:18.923552', 'step': 7966, 'epoch': 2} {'type': 'loss', 'content': 0.18594476580619812, 'timestamp': '2025-09-10 02:35:18.925892', 'step': 7967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:18.957076', 'step': 7967, 'epoch': 2} {'type': 'loss', 'content': 0.07889854162931442, 'timestamp': '2025-09-10 02:35:18.980232', 'step': 7968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:19.011136', 'step': 7968, 'epoch': 2} {'type': 'loss', 'content': 0.1336614191532135, 'timestamp': '2025-09-10 02:35:19.014091', 'step': 7969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:19.044214', 'step': 7969, 'epoch': 2} {'type': 'loss', 'content': 0.08504971861839294, 'timestamp': '2025-09-10 02:35:19.046665', 'step': 7970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:19.077295', 'step': 7970, 'epoch': 2} {'type': 'loss', 'content': 0.06580179929733276, 'timestamp': '2025-09-10 02:35:19.079857', 'step': 7971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:19.110308', 'step': 7971, 'epoch': 2} {'type': 'loss', 'content': 0.08431702852249146, 'timestamp': '2025-09-10 02:35:19.135486', 'step': 7972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:19.166599', 'step': 7972, 'epoch': 2} {'type': 'loss', 'content': 0.20143835246562958, 'timestamp': '2025-09-10 02:35:19.173515', 'step': 7973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:19.205909', 'step': 7973, 'epoch': 2} {'type': 'loss', 'content': 0.13498543202877045, 'timestamp': '2025-09-10 02:35:19.209698', 'step': 7974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:19.240240', 'step': 7974, 'epoch': 2} {'type': 'loss', 'content': 0.1356506049633026, 'timestamp': '2025-09-10 02:35:19.242594', 'step': 7975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:19.272954', 'step': 7975, 'epoch': 2} {'type': 'loss', 'content': 0.10177556425333023, 'timestamp': '2025-09-10 02:35:19.296587', 'step': 7976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:19.326790', 'step': 7976, 'epoch': 2} {'type': 'loss', 'content': 0.15410445630550385, 'timestamp': '2025-09-10 02:35:19.329076', 'step': 7977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:19.359688', 'step': 7977, 'epoch': 2} {'type': 'loss', 'content': 0.0825304463505745, 'timestamp': '2025-09-10 02:35:19.362096', 'step': 7978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:19.393018', 'step': 7978, 'epoch': 2} {'type': 'loss', 'content': 0.14701774716377258, 'timestamp': '2025-09-10 02:35:19.395467', 'step': 7979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:19.425713', 'step': 7979, 'epoch': 2} {'type': 'loss', 'content': 0.07488248497247696, 'timestamp': '2025-09-10 02:35:19.449149', 'step': 7980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:19.478773', 'step': 7980, 'epoch': 2} {'type': 'loss', 'content': 0.09252927452325821, 'timestamp': '2025-09-10 02:35:19.481179', 'step': 7981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:19.511694', 'step': 7981, 'epoch': 2} {'type': 'loss', 'content': 0.2562251091003418, 'timestamp': '2025-09-10 02:35:19.516117', 'step': 7982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:19.546321', 'step': 7982, 'epoch': 2} {'type': 'loss', 'content': 0.10429184883832932, 'timestamp': '2025-09-10 02:35:19.548459', 'step': 7983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:19.578235', 'step': 7983, 'epoch': 2} {'type': 'loss', 'content': 0.04984594136476517, 'timestamp': '2025-09-10 02:35:19.601659', 'step': 7984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:19.631956', 'step': 7984, 'epoch': 2} {'type': 'loss', 'content': 0.05593819171190262, 'timestamp': '2025-09-10 02:35:19.634325', 'step': 7985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:19.664387', 'step': 7985, 'epoch': 2} {'type': 'loss', 'content': 0.16780930757522583, 'timestamp': '2025-09-10 02:35:19.666628', 'step': 7986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:19.696434', 'step': 7986, 'epoch': 2} {'type': 'loss', 'content': 0.1353367269039154, 'timestamp': '2025-09-10 02:35:19.698967', 'step': 7987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:35:19.729662', 'step': 7987, 'epoch': 2} {'type': 'loss', 'content': 0.1777905523777008, 'timestamp': '2025-09-10 02:35:19.754640', 'step': 7988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:19.784907', 'step': 7988, 'epoch': 2} {'type': 'loss', 'content': 0.10983887314796448, 'timestamp': '2025-09-10 02:35:19.787326', 'step': 7989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:19.818342', 'step': 7989, 'epoch': 2} {'type': 'loss', 'content': 0.140898659825325, 'timestamp': '2025-09-10 02:35:19.820584', 'step': 7990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:19.851293', 'step': 7990, 'epoch': 2} {'type': 'loss', 'content': 0.09806334972381592, 'timestamp': '2025-09-10 02:35:19.853525', 'step': 7991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:19.882956', 'step': 7991, 'epoch': 2} {'type': 'loss', 'content': 0.06013879552483559, 'timestamp': '2025-09-10 02:35:19.906636', 'step': 7992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:35:19.940297', 'step': 7992, 'epoch': 2} {'type': 'loss', 'content': 0.12449812144041061, 'timestamp': '2025-09-10 02:35:19.942706', 'step': 7993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:35:19.973556', 'step': 7993, 'epoch': 2} {'type': 'loss', 'content': 0.10446386784315109, 'timestamp': '2025-09-10 02:35:19.977734', 'step': 7994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:20.009474', 'step': 7994, 'epoch': 2} {'type': 'loss', 'content': 0.19648875296115875, 'timestamp': '2025-09-10 02:35:20.012589', 'step': 7995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:20.044800', 'step': 7995, 'epoch': 2} {'type': 'loss', 'content': 0.17926691472530365, 'timestamp': '2025-09-10 02:35:20.073500', 'step': 7996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:20.108963', 'step': 7996, 'epoch': 2} {'type': 'loss', 'content': 0.07305103540420532, 'timestamp': '2025-09-10 02:35:20.114358', 'step': 7997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:20.156449', 'step': 7997, 'epoch': 2} {'type': 'loss', 'content': 0.08906116336584091, 'timestamp': '2025-09-10 02:35:20.159785', 'step': 7998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:20.195395', 'step': 7998, 'epoch': 2} {'type': 'loss', 'content': 0.19747576117515564, 'timestamp': '2025-09-10 02:35:20.197480', 'step': 7999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:20.229492', 'step': 7999, 'epoch': 2} {'type': 'loss', 'content': 0.1501264125108719, 'timestamp': '2025-09-10 02:35:20.254555', 'step': 8000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8000', 'timestamp': '2025-09-10 02:35:26.523740', 'step': 8000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:26.563326', 'step': 8000, 'epoch': 2} {'type': 'loss', 'content': 0.11711043864488602, 'timestamp': '2025-09-10 02:35:26.565607', 'step': 8001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:26.597610', 'step': 8001, 'epoch': 2} {'type': 'loss', 'content': 0.10513573884963989, 'timestamp': '2025-09-10 02:35:26.600111', 'step': 8002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:26.632022', 'step': 8002, 'epoch': 2} {'type': 'loss', 'content': 0.07588793337345123, 'timestamp': '2025-09-10 02:35:26.634504', 'step': 8003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:26.664532', 'step': 8003, 'epoch': 2} {'type': 'loss', 'content': 0.07858744263648987, 'timestamp': '2025-09-10 02:35:26.688314', 'step': 8004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:26.719116', 'step': 8004, 'epoch': 2} {'type': 'loss', 'content': 0.11387301981449127, 'timestamp': '2025-09-10 02:35:26.721300', 'step': 8005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:26.751288', 'step': 8005, 'epoch': 2} {'type': 'loss', 'content': 0.13772456347942352, 'timestamp': '2025-09-10 02:35:26.753697', 'step': 8006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:26.783828', 'step': 8006, 'epoch': 2} {'type': 'loss', 'content': 0.11555995047092438, 'timestamp': '2025-09-10 02:35:26.786330', 'step': 8007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:26.816677', 'step': 8007, 'epoch': 2} {'type': 'loss', 'content': 0.15918992459774017, 'timestamp': '2025-09-10 02:35:26.840097', 'step': 8008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:26.870503', 'step': 8008, 'epoch': 2} {'type': 'loss', 'content': 0.14370310306549072, 'timestamp': '2025-09-10 02:35:26.872645', 'step': 8009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:26.902389', 'step': 8009, 'epoch': 2} {'type': 'loss', 'content': 0.075681671500206, 'timestamp': '2025-09-10 02:35:26.904590', 'step': 8010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:26.936717', 'step': 8010, 'epoch': 2} {'type': 'loss', 'content': 0.07904897630214691, 'timestamp': '2025-09-10 02:35:26.939344', 'step': 8011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:26.969375', 'step': 8011, 'epoch': 2} {'type': 'loss', 'content': 0.15798285603523254, 'timestamp': '2025-09-10 02:35:26.992863', 'step': 8012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:27.024028', 'step': 8012, 'epoch': 2} {'type': 'loss', 'content': 0.14232569932937622, 'timestamp': '2025-09-10 02:35:27.027000', 'step': 8013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:27.057837', 'step': 8013, 'epoch': 2} {'type': 'loss', 'content': 0.12238762527704239, 'timestamp': '2025-09-10 02:35:27.060197', 'step': 8014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.090407', 'step': 8014, 'epoch': 2} {'type': 'loss', 'content': 0.11009003221988678, 'timestamp': '2025-09-10 02:35:27.093968', 'step': 8015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.126462', 'step': 8015, 'epoch': 2} {'type': 'loss', 'content': 0.1071920171380043, 'timestamp': '2025-09-10 02:35:27.149950', 'step': 8016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.180511', 'step': 8016, 'epoch': 2} {'type': 'loss', 'content': 0.06847653537988663, 'timestamp': '2025-09-10 02:35:27.182965', 'step': 8017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:27.213350', 'step': 8017, 'epoch': 2} {'type': 'loss', 'content': 0.06319503486156464, 'timestamp': '2025-09-10 02:35:27.215749', 'step': 8018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.247116', 'step': 8018, 'epoch': 2} {'type': 'loss', 'content': 0.09517364203929901, 'timestamp': '2025-09-10 02:35:27.249401', 'step': 8019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:27.279862', 'step': 8019, 'epoch': 2} {'type': 'loss', 'content': 0.12275416404008865, 'timestamp': '2025-09-10 02:35:27.303148', 'step': 8020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.333278', 'step': 8020, 'epoch': 2} {'type': 'loss', 'content': 0.16808699071407318, 'timestamp': '2025-09-10 02:35:27.335723', 'step': 8021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:27.365678', 'step': 8021, 'epoch': 2} {'type': 'loss', 'content': 0.06162087246775627, 'timestamp': '2025-09-10 02:35:27.367941', 'step': 8022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:27.399771', 'step': 8022, 'epoch': 2} {'type': 'loss', 'content': 0.10324589908123016, 'timestamp': '2025-09-10 02:35:27.402286', 'step': 8023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:27.432213', 'step': 8023, 'epoch': 2} {'type': 'loss', 'content': 0.23484529554843903, 'timestamp': '2025-09-10 02:35:27.455867', 'step': 8024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:35:27.487202', 'step': 8024, 'epoch': 2} {'type': 'loss', 'content': 0.16857385635375977, 'timestamp': '2025-09-10 02:35:27.489446', 'step': 8025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.519122', 'step': 8025, 'epoch': 2} {'type': 'loss', 'content': 0.1360621303319931, 'timestamp': '2025-09-10 02:35:27.521369', 'step': 8026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:27.551117', 'step': 8026, 'epoch': 2} {'type': 'loss', 'content': 0.164216086268425, 'timestamp': '2025-09-10 02:35:27.553263', 'step': 8027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:27.583750', 'step': 8027, 'epoch': 2} {'type': 'loss', 'content': 0.08478499203920364, 'timestamp': '2025-09-10 02:35:27.607331', 'step': 8028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.637929', 'step': 8028, 'epoch': 2} {'type': 'loss', 'content': 0.11923660337924957, 'timestamp': '2025-09-10 02:35:27.640674', 'step': 8029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.670445', 'step': 8029, 'epoch': 2} {'type': 'loss', 'content': 0.13423582911491394, 'timestamp': '2025-09-10 02:35:27.672740', 'step': 8030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:27.702821', 'step': 8030, 'epoch': 2} {'type': 'loss', 'content': 0.13876213133335114, 'timestamp': '2025-09-10 02:35:27.705028', 'step': 8031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.735280', 'step': 8031, 'epoch': 2} {'type': 'loss', 'content': 0.08171947300434113, 'timestamp': '2025-09-10 02:35:27.758649', 'step': 8032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.789394', 'step': 8032, 'epoch': 2} {'type': 'loss', 'content': 0.058539606630802155, 'timestamp': '2025-09-10 02:35:27.791589', 'step': 8033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:27.822795', 'step': 8033, 'epoch': 2} {'type': 'loss', 'content': 0.286714643239975, 'timestamp': '2025-09-10 02:35:27.825052', 'step': 8034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:27.854880', 'step': 8034, 'epoch': 2} {'type': 'loss', 'content': 0.07368244975805283, 'timestamp': '2025-09-10 02:35:27.857127', 'step': 8035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:27.888956', 'step': 8035, 'epoch': 2} {'type': 'loss', 'content': 0.10191936045885086, 'timestamp': '2025-09-10 02:35:27.912275', 'step': 8036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:27.942662', 'step': 8036, 'epoch': 2} {'type': 'loss', 'content': 0.06287332624197006, 'timestamp': '2025-09-10 02:35:27.945413', 'step': 8037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:27.975131', 'step': 8037, 'epoch': 2} {'type': 'loss', 'content': 0.228152334690094, 'timestamp': '2025-09-10 02:35:27.977653', 'step': 8038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:28.008957', 'step': 8038, 'epoch': 2} {'type': 'loss', 'content': 0.19708064198493958, 'timestamp': '2025-09-10 02:35:28.012286', 'step': 8039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:28.042889', 'step': 8039, 'epoch': 2} {'type': 'loss', 'content': 0.14556628465652466, 'timestamp': '2025-09-10 02:35:28.066571', 'step': 8040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:28.097007', 'step': 8040, 'epoch': 2} {'type': 'loss', 'content': 0.08633075654506683, 'timestamp': '2025-09-10 02:35:28.099091', 'step': 8041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:28.128578', 'step': 8041, 'epoch': 2} {'type': 'loss', 'content': 0.0819757878780365, 'timestamp': '2025-09-10 02:35:28.130879', 'step': 8042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:28.162128', 'step': 8042, 'epoch': 2} {'type': 'loss', 'content': 0.09111200273036957, 'timestamp': '2025-09-10 02:35:28.164362', 'step': 8043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:28.193983', 'step': 8043, 'epoch': 2} {'type': 'loss', 'content': 0.1922103613615036, 'timestamp': '2025-09-10 02:35:28.217672', 'step': 8044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:28.250224', 'step': 8044, 'epoch': 2} {'type': 'loss', 'content': 0.15763624012470245, 'timestamp': '2025-09-10 02:35:28.252329', 'step': 8045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:35:28.282375', 'step': 8045, 'epoch': 2} {'type': 'loss', 'content': 0.08885146677494049, 'timestamp': '2025-09-10 02:35:28.289768', 'step': 8046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:28.320351', 'step': 8046, 'epoch': 2} {'type': 'loss', 'content': 0.11913053691387177, 'timestamp': '2025-09-10 02:35:28.322394', 'step': 8047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:28.353537', 'step': 8047, 'epoch': 2} {'type': 'loss', 'content': 0.12550626695156097, 'timestamp': '2025-09-10 02:35:28.377153', 'step': 8048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:28.407815', 'step': 8048, 'epoch': 2} {'type': 'loss', 'content': 0.20762239396572113, 'timestamp': '2025-09-10 02:35:28.410360', 'step': 8049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:28.441441', 'step': 8049, 'epoch': 2} {'type': 'loss', 'content': 0.11760586500167847, 'timestamp': '2025-09-10 02:35:28.443875', 'step': 8050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:28.474196', 'step': 8050, 'epoch': 2} {'type': 'loss', 'content': 0.1268884837627411, 'timestamp': '2025-09-10 02:35:28.476932', 'step': 8051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:35:28.508129', 'step': 8051, 'epoch': 2} {'type': 'loss', 'content': 0.13079720735549927, 'timestamp': '2025-09-10 02:35:28.536188', 'step': 8052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:28.568839', 'step': 8052, 'epoch': 2} {'type': 'loss', 'content': 0.1415143460035324, 'timestamp': '2025-09-10 02:35:28.570986', 'step': 8053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:28.601532', 'step': 8053, 'epoch': 2} {'type': 'loss', 'content': 0.09211014211177826, 'timestamp': '2025-09-10 02:35:28.603766', 'step': 8054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:28.634019', 'step': 8054, 'epoch': 2} {'type': 'loss', 'content': 0.1200605258345604, 'timestamp': '2025-09-10 02:35:28.637198', 'step': 8055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:28.666774', 'step': 8055, 'epoch': 2} {'type': 'loss', 'content': 0.2497985064983368, 'timestamp': '2025-09-10 02:35:28.690502', 'step': 8056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:28.720816', 'step': 8056, 'epoch': 2} {'type': 'loss', 'content': 0.13306377828121185, 'timestamp': '2025-09-10 02:35:28.722950', 'step': 8057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:28.752782', 'step': 8057, 'epoch': 2} {'type': 'loss', 'content': 0.1404944658279419, 'timestamp': '2025-09-10 02:35:28.754966', 'step': 8058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:28.785025', 'step': 8058, 'epoch': 2} {'type': 'loss', 'content': 0.12130942940711975, 'timestamp': '2025-09-10 02:35:28.787322', 'step': 8059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:28.817453', 'step': 8059, 'epoch': 2} {'type': 'loss', 'content': 0.1255791038274765, 'timestamp': '2025-09-10 02:35:28.841039', 'step': 8060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:28.871678', 'step': 8060, 'epoch': 2} {'type': 'loss', 'content': 0.09052511304616928, 'timestamp': '2025-09-10 02:35:28.873889', 'step': 8061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:28.904342', 'step': 8061, 'epoch': 2} {'type': 'loss', 'content': 0.2335635870695114, 'timestamp': '2025-09-10 02:35:28.906748', 'step': 8062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:28.936876', 'step': 8062, 'epoch': 2} {'type': 'loss', 'content': 0.141896590590477, 'timestamp': '2025-09-10 02:35:28.938859', 'step': 8063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:28.968450', 'step': 8063, 'epoch': 2} {'type': 'loss', 'content': 0.13040362298488617, 'timestamp': '2025-09-10 02:35:28.991900', 'step': 8064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:29.022479', 'step': 8064, 'epoch': 2} {'type': 'loss', 'content': 0.1322021335363388, 'timestamp': '2025-09-10 02:35:29.024840', 'step': 8065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:29.054755', 'step': 8065, 'epoch': 2} {'type': 'loss', 'content': 0.07949093729257584, 'timestamp': '2025-09-10 02:35:29.057206', 'step': 8066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:29.087554', 'step': 8066, 'epoch': 2} {'type': 'loss', 'content': 0.06421338766813278, 'timestamp': '2025-09-10 02:35:29.089807', 'step': 8067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:29.120747', 'step': 8067, 'epoch': 2} {'type': 'loss', 'content': 0.157235249876976, 'timestamp': '2025-09-10 02:35:29.144340', 'step': 8068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:29.174634', 'step': 8068, 'epoch': 2} {'type': 'loss', 'content': 0.03596925362944603, 'timestamp': '2025-09-10 02:35:29.177239', 'step': 8069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:29.207134', 'step': 8069, 'epoch': 2} {'type': 'loss', 'content': 0.13928882777690887, 'timestamp': '2025-09-10 02:35:29.209390', 'step': 8070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:29.239764', 'step': 8070, 'epoch': 2} {'type': 'loss', 'content': 0.1860787719488144, 'timestamp': '2025-09-10 02:35:29.247562', 'step': 8071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:29.278440', 'step': 8071, 'epoch': 2} {'type': 'loss', 'content': 0.15719039738178253, 'timestamp': '2025-09-10 02:35:29.302016', 'step': 8072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:29.332327', 'step': 8072, 'epoch': 2} {'type': 'loss', 'content': 0.08353747427463531, 'timestamp': '2025-09-10 02:35:29.339776', 'step': 8073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:29.374615', 'step': 8073, 'epoch': 2} {'type': 'loss', 'content': 0.09775883704423904, 'timestamp': '2025-09-10 02:35:29.376856', 'step': 8074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:29.407637', 'step': 8074, 'epoch': 2} {'type': 'loss', 'content': 0.18719525635242462, 'timestamp': '2025-09-10 02:35:29.412482', 'step': 8075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:29.445617', 'step': 8075, 'epoch': 2} {'type': 'loss', 'content': 0.14201997220516205, 'timestamp': '2025-09-10 02:35:29.471173', 'step': 8076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:29.515960', 'step': 8076, 'epoch': 2} {'type': 'loss', 'content': 0.14826805889606476, 'timestamp': '2025-09-10 02:35:29.518695', 'step': 8077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:29.549153', 'step': 8077, 'epoch': 2} {'type': 'loss', 'content': 0.1829831600189209, 'timestamp': '2025-09-10 02:35:29.551622', 'step': 8078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:29.582324', 'step': 8078, 'epoch': 2} {'type': 'loss', 'content': 0.14535291492938995, 'timestamp': '2025-09-10 02:35:29.584509', 'step': 8079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:29.617676', 'step': 8079, 'epoch': 2} {'type': 'loss', 'content': 0.1791960895061493, 'timestamp': '2025-09-10 02:35:29.641230', 'step': 8080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:29.683500', 'step': 8080, 'epoch': 2} {'type': 'loss', 'content': 0.1365203559398651, 'timestamp': '2025-09-10 02:35:29.685674', 'step': 8081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:29.716604', 'step': 8081, 'epoch': 2} {'type': 'loss', 'content': 0.1745753139257431, 'timestamp': '2025-09-10 02:35:29.718942', 'step': 8082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:29.750063', 'step': 8082, 'epoch': 2} {'type': 'loss', 'content': 0.1706235408782959, 'timestamp': '2025-09-10 02:35:29.752719', 'step': 8083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:29.783485', 'step': 8083, 'epoch': 2} {'type': 'loss', 'content': 0.15565964579582214, 'timestamp': '2025-09-10 02:35:29.806652', 'step': 8084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:29.837065', 'step': 8084, 'epoch': 2} {'type': 'loss', 'content': 0.10865513235330582, 'timestamp': '2025-09-10 02:35:29.839250', 'step': 8085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:29.870963', 'step': 8085, 'epoch': 2} {'type': 'loss', 'content': 0.2070627212524414, 'timestamp': '2025-09-10 02:35:29.873285', 'step': 8086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:29.903784', 'step': 8086, 'epoch': 2} {'type': 'loss', 'content': 0.21631428599357605, 'timestamp': '2025-09-10 02:35:29.907588', 'step': 8087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:29.944343', 'step': 8087, 'epoch': 2} {'type': 'loss', 'content': 0.10946745425462723, 'timestamp': '2025-09-10 02:35:29.969224', 'step': 8088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:30.014293', 'step': 8088, 'epoch': 2} {'type': 'loss', 'content': 0.1567525863647461, 'timestamp': '2025-09-10 02:35:30.016623', 'step': 8089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:30.057238', 'step': 8089, 'epoch': 2} {'type': 'loss', 'content': 0.057982273399829865, 'timestamp': '2025-09-10 02:35:30.059510', 'step': 8090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.089592', 'step': 8090, 'epoch': 2} {'type': 'loss', 'content': 0.12209701538085938, 'timestamp': '2025-09-10 02:35:30.091942', 'step': 8091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.123251', 'step': 8091, 'epoch': 2} {'type': 'loss', 'content': 0.11244718730449677, 'timestamp': '2025-09-10 02:35:30.146753', 'step': 8092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:30.180225', 'step': 8092, 'epoch': 2} {'type': 'loss', 'content': 0.140837162733078, 'timestamp': '2025-09-10 02:35:30.183980', 'step': 8093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:30.214134', 'step': 8093, 'epoch': 2} {'type': 'loss', 'content': 0.15893803536891937, 'timestamp': '2025-09-10 02:35:30.217156', 'step': 8094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:30.252671', 'step': 8094, 'epoch': 2} {'type': 'loss', 'content': 0.108072929084301, 'timestamp': '2025-09-10 02:35:30.255398', 'step': 8095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:30.285893', 'step': 8095, 'epoch': 2} {'type': 'loss', 'content': 0.12539096176624298, 'timestamp': '2025-09-10 02:35:30.309307', 'step': 8096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:30.339549', 'step': 8096, 'epoch': 2} {'type': 'loss', 'content': 0.10501203685998917, 'timestamp': '2025-09-10 02:35:30.341896', 'step': 8097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:30.371802', 'step': 8097, 'epoch': 2} {'type': 'loss', 'content': 0.12555785477161407, 'timestamp': '2025-09-10 02:35:30.374333', 'step': 8098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.405315', 'step': 8098, 'epoch': 2} {'type': 'loss', 'content': 0.1566930115222931, 'timestamp': '2025-09-10 02:35:30.407484', 'step': 8099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:30.437990', 'step': 8099, 'epoch': 2} {'type': 'loss', 'content': 0.08949551731348038, 'timestamp': '2025-09-10 02:35:30.461895', 'step': 8100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.492678', 'step': 8100, 'epoch': 2} {'type': 'loss', 'content': 0.07897763699293137, 'timestamp': '2025-09-10 02:35:30.495394', 'step': 8101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.525473', 'step': 8101, 'epoch': 2} {'type': 'loss', 'content': 0.060858357697725296, 'timestamp': '2025-09-10 02:35:30.528009', 'step': 8102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.557956', 'step': 8102, 'epoch': 2} {'type': 'loss', 'content': 0.13628675043582916, 'timestamp': '2025-09-10 02:35:30.560497', 'step': 8103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.590319', 'step': 8103, 'epoch': 2} {'type': 'loss', 'content': 0.12269160151481628, 'timestamp': '2025-09-10 02:35:30.614303', 'step': 8104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:30.645024', 'step': 8104, 'epoch': 2} {'type': 'loss', 'content': 0.14415207505226135, 'timestamp': '2025-09-10 02:35:30.647417', 'step': 8105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.677820', 'step': 8105, 'epoch': 2} {'type': 'loss', 'content': 0.09138987213373184, 'timestamp': '2025-09-10 02:35:30.680404', 'step': 8106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:30.710372', 'step': 8106, 'epoch': 2} {'type': 'loss', 'content': 0.17309792339801788, 'timestamp': '2025-09-10 02:35:30.712579', 'step': 8107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:30.742328', 'step': 8107, 'epoch': 2} {'type': 'loss', 'content': 0.1705635040998459, 'timestamp': '2025-09-10 02:35:30.765861', 'step': 8108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.795963', 'step': 8108, 'epoch': 2} {'type': 'loss', 'content': 0.17043191194534302, 'timestamp': '2025-09-10 02:35:30.798128', 'step': 8109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:30.828169', 'step': 8109, 'epoch': 2} {'type': 'loss', 'content': 0.09496653825044632, 'timestamp': '2025-09-10 02:35:30.830504', 'step': 8110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:30.861356', 'step': 8110, 'epoch': 2} {'type': 'loss', 'content': 0.1613730788230896, 'timestamp': '2025-09-10 02:35:30.864247', 'step': 8111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:30.894157', 'step': 8111, 'epoch': 2} {'type': 'loss', 'content': 0.05298392474651337, 'timestamp': '2025-09-10 02:35:30.917549', 'step': 8112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:30.948155', 'step': 8112, 'epoch': 2} {'type': 'loss', 'content': 0.13100838661193848, 'timestamp': '2025-09-10 02:35:30.950240', 'step': 8113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:30.980185', 'step': 8113, 'epoch': 2} {'type': 'loss', 'content': 0.12600383162498474, 'timestamp': '2025-09-10 02:35:30.982727', 'step': 8114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:31.014706', 'step': 8114, 'epoch': 2} {'type': 'loss', 'content': 0.175247922539711, 'timestamp': '2025-09-10 02:35:31.016989', 'step': 8115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.047628', 'step': 8115, 'epoch': 2} {'type': 'loss', 'content': 0.07962021976709366, 'timestamp': '2025-09-10 02:35:31.071322', 'step': 8116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:31.103706', 'step': 8116, 'epoch': 2} {'type': 'loss', 'content': 0.10232146084308624, 'timestamp': '2025-09-10 02:35:31.106111', 'step': 8117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.136177', 'step': 8117, 'epoch': 2} {'type': 'loss', 'content': 0.0978061705827713, 'timestamp': '2025-09-10 02:35:31.138435', 'step': 8118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:31.169481', 'step': 8118, 'epoch': 2} {'type': 'loss', 'content': 0.09793012589216232, 'timestamp': '2025-09-10 02:35:31.171751', 'step': 8119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.202219', 'step': 8119, 'epoch': 2} {'type': 'loss', 'content': 0.11651819199323654, 'timestamp': '2025-09-10 02:35:31.225851', 'step': 8120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.257257', 'step': 8120, 'epoch': 2} {'type': 'loss', 'content': 0.16536273062229156, 'timestamp': '2025-09-10 02:35:31.259441', 'step': 8121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.291566', 'step': 8121, 'epoch': 2} {'type': 'loss', 'content': 0.1461816430091858, 'timestamp': '2025-09-10 02:35:31.293996', 'step': 8122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.325891', 'step': 8122, 'epoch': 2} {'type': 'loss', 'content': 0.19661082327365875, 'timestamp': '2025-09-10 02:35:31.328267', 'step': 8123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.358601', 'step': 8123, 'epoch': 2} {'type': 'loss', 'content': 0.09997537732124329, 'timestamp': '2025-09-10 02:35:31.382310', 'step': 8124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.411850', 'step': 8124, 'epoch': 2} {'type': 'loss', 'content': 0.1248064935207367, 'timestamp': '2025-09-10 02:35:31.414288', 'step': 8125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:31.444365', 'step': 8125, 'epoch': 2} {'type': 'loss', 'content': 0.11784330010414124, 'timestamp': '2025-09-10 02:35:31.446601', 'step': 8126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:31.476964', 'step': 8126, 'epoch': 2} {'type': 'loss', 'content': 0.18320560455322266, 'timestamp': '2025-09-10 02:35:31.479502', 'step': 8127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:31.511113', 'step': 8127, 'epoch': 2} {'type': 'loss', 'content': 0.1262969821691513, 'timestamp': '2025-09-10 02:35:31.534727', 'step': 8128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:31.565533', 'step': 8128, 'epoch': 2} {'type': 'loss', 'content': 0.23589517176151276, 'timestamp': '2025-09-10 02:35:31.569275', 'step': 8129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:35:31.599599', 'step': 8129, 'epoch': 2} {'type': 'loss', 'content': 0.13287270069122314, 'timestamp': '2025-09-10 02:35:31.603863', 'step': 8130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.633654', 'step': 8130, 'epoch': 2} {'type': 'loss', 'content': 0.10959652811288834, 'timestamp': '2025-09-10 02:35:31.635922', 'step': 8131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:31.667056', 'step': 8131, 'epoch': 2} {'type': 'loss', 'content': 0.07727108150720596, 'timestamp': '2025-09-10 02:35:31.691797', 'step': 8132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:31.721853', 'step': 8132, 'epoch': 2} {'type': 'loss', 'content': 0.1292504221200943, 'timestamp': '2025-09-10 02:35:31.723953', 'step': 8133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.754307', 'step': 8133, 'epoch': 2} {'type': 'loss', 'content': 0.11428376287221909, 'timestamp': '2025-09-10 02:35:31.756737', 'step': 8134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.787324', 'step': 8134, 'epoch': 2} {'type': 'loss', 'content': 0.16597306728363037, 'timestamp': '2025-09-10 02:35:31.789469', 'step': 8135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:31.819189', 'step': 8135, 'epoch': 2} {'type': 'loss', 'content': 0.08566420525312424, 'timestamp': '2025-09-10 02:35:31.842645', 'step': 8136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.873281', 'step': 8136, 'epoch': 2} {'type': 'loss', 'content': 0.11765114217996597, 'timestamp': '2025-09-10 02:35:31.875579', 'step': 8137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:31.907136', 'step': 8137, 'epoch': 2} {'type': 'loss', 'content': 0.15478429198265076, 'timestamp': '2025-09-10 02:35:31.909532', 'step': 8138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:31.939488', 'step': 8138, 'epoch': 2} {'type': 'loss', 'content': 0.12873026728630066, 'timestamp': '2025-09-10 02:35:31.942400', 'step': 8139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:31.972171', 'step': 8139, 'epoch': 2} {'type': 'loss', 'content': 0.11977563053369522, 'timestamp': '2025-09-10 02:35:31.997321', 'step': 8140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:32.027994', 'step': 8140, 'epoch': 2} {'type': 'loss', 'content': 0.110658198595047, 'timestamp': '2025-09-10 02:35:32.030090', 'step': 8141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:32.059869', 'step': 8141, 'epoch': 2} {'type': 'loss', 'content': 0.1269223839044571, 'timestamp': '2025-09-10 02:35:32.062270', 'step': 8142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:32.092274', 'step': 8142, 'epoch': 2} {'type': 'loss', 'content': 0.14078576862812042, 'timestamp': '2025-09-10 02:35:32.094408', 'step': 8143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:32.124324', 'step': 8143, 'epoch': 2} {'type': 'loss', 'content': 0.0725056603550911, 'timestamp': '2025-09-10 02:35:32.148578', 'step': 8144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:32.178945', 'step': 8144, 'epoch': 2} {'type': 'loss', 'content': 0.15123356878757477, 'timestamp': '2025-09-10 02:35:32.181290', 'step': 8145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:32.211573', 'step': 8145, 'epoch': 2} {'type': 'loss', 'content': 0.13099227845668793, 'timestamp': '2025-09-10 02:35:32.213974', 'step': 8146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:32.244265', 'step': 8146, 'epoch': 2} {'type': 'loss', 'content': 0.08137372881174088, 'timestamp': '2025-09-10 02:35:32.246534', 'step': 8147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:32.276497', 'step': 8147, 'epoch': 2} {'type': 'loss', 'content': 0.08057025074958801, 'timestamp': '2025-09-10 02:35:32.300403', 'step': 8148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:32.331658', 'step': 8148, 'epoch': 2} {'type': 'loss', 'content': 0.15933556854724884, 'timestamp': '2025-09-10 02:35:32.333877', 'step': 8149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:32.365353', 'step': 8149, 'epoch': 2} {'type': 'loss', 'content': 0.17161954939365387, 'timestamp': '2025-09-10 02:35:32.367852', 'step': 8150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:32.398006', 'step': 8150, 'epoch': 2} {'type': 'loss', 'content': 0.11909079551696777, 'timestamp': '2025-09-10 02:35:32.400225', 'step': 8151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:32.431479', 'step': 8151, 'epoch': 2} {'type': 'loss', 'content': 0.14520397782325745, 'timestamp': '2025-09-10 02:35:32.455208', 'step': 8152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:32.485584', 'step': 8152, 'epoch': 2} {'type': 'loss', 'content': 0.15888109803199768, 'timestamp': '2025-09-10 02:35:32.488438', 'step': 8153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:32.518855', 'step': 8153, 'epoch': 2} {'type': 'loss', 'content': 0.24290835857391357, 'timestamp': '2025-09-10 02:35:32.521107', 'step': 8154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:32.551409', 'step': 8154, 'epoch': 2} {'type': 'loss', 'content': 0.1690400242805481, 'timestamp': '2025-09-10 02:35:32.556186', 'step': 8155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:32.588465', 'step': 8155, 'epoch': 2} {'type': 'loss', 'content': 0.11477158963680267, 'timestamp': '2025-09-10 02:35:32.612103', 'step': 8156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:32.643468', 'step': 8156, 'epoch': 2} {'type': 'loss', 'content': 0.11799057573080063, 'timestamp': '2025-09-10 02:35:32.645585', 'step': 8157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:32.677656', 'step': 8157, 'epoch': 2} {'type': 'loss', 'content': 0.11128905415534973, 'timestamp': '2025-09-10 02:35:32.680121', 'step': 8158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:32.711479', 'step': 8158, 'epoch': 2} {'type': 'loss', 'content': 0.1382637619972229, 'timestamp': '2025-09-10 02:35:32.713843', 'step': 8159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:32.743900', 'step': 8159, 'epoch': 2} {'type': 'loss', 'content': 0.06693369895219803, 'timestamp': '2025-09-10 02:35:32.768021', 'step': 8160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:32.798237', 'step': 8160, 'epoch': 2} {'type': 'loss', 'content': 0.07700785249471664, 'timestamp': '2025-09-10 02:35:32.800255', 'step': 8161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:32.830561', 'step': 8161, 'epoch': 2} {'type': 'loss', 'content': 0.046948160976171494, 'timestamp': '2025-09-10 02:35:32.832805', 'step': 8162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:32.863190', 'step': 8162, 'epoch': 2} {'type': 'loss', 'content': 0.13983872532844543, 'timestamp': '2025-09-10 02:35:32.865664', 'step': 8163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:32.896916', 'step': 8163, 'epoch': 2} {'type': 'loss', 'content': 0.201206773519516, 'timestamp': '2025-09-10 02:35:32.921272', 'step': 8164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:32.951494', 'step': 8164, 'epoch': 2} {'type': 'loss', 'content': 0.1396111249923706, 'timestamp': '2025-09-10 02:35:32.953974', 'step': 8165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:32.984371', 'step': 8165, 'epoch': 2} {'type': 'loss', 'content': 0.1634623110294342, 'timestamp': '2025-09-10 02:35:32.990662', 'step': 8166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:33.021302', 'step': 8166, 'epoch': 2} {'type': 'loss', 'content': 0.2205132693052292, 'timestamp': '2025-09-10 02:35:33.023797', 'step': 8167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.054757', 'step': 8167, 'epoch': 2} {'type': 'loss', 'content': 0.09539152681827545, 'timestamp': '2025-09-10 02:35:33.078461', 'step': 8168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:33.108467', 'step': 8168, 'epoch': 2} {'type': 'loss', 'content': 0.08031023293733597, 'timestamp': '2025-09-10 02:35:33.110605', 'step': 8169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.140893', 'step': 8169, 'epoch': 2} {'type': 'loss', 'content': 0.12617500126361847, 'timestamp': '2025-09-10 02:35:33.143185', 'step': 8170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.175869', 'step': 8170, 'epoch': 2} {'type': 'loss', 'content': 0.14723433554172516, 'timestamp': '2025-09-10 02:35:33.178082', 'step': 8171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:33.208915', 'step': 8171, 'epoch': 2} {'type': 'loss', 'content': 0.11253656446933746, 'timestamp': '2025-09-10 02:35:33.232332', 'step': 8172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:33.262998', 'step': 8172, 'epoch': 2} {'type': 'loss', 'content': 0.12195031344890594, 'timestamp': '2025-09-10 02:35:33.264991', 'step': 8173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.294822', 'step': 8173, 'epoch': 2} {'type': 'loss', 'content': 0.1447949856519699, 'timestamp': '2025-09-10 02:35:33.297206', 'step': 8174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.327117', 'step': 8174, 'epoch': 2} {'type': 'loss', 'content': 0.08300803601741791, 'timestamp': '2025-09-10 02:35:33.329298', 'step': 8175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:33.359335', 'step': 8175, 'epoch': 2} {'type': 'loss', 'content': 0.08610697090625763, 'timestamp': '2025-09-10 02:35:33.382988', 'step': 8176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:33.412788', 'step': 8176, 'epoch': 2} {'type': 'loss', 'content': 0.03721940144896507, 'timestamp': '2025-09-10 02:35:33.416076', 'step': 8177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.446349', 'step': 8177, 'epoch': 2} {'type': 'loss', 'content': 0.16343118250370026, 'timestamp': '2025-09-10 02:35:33.448506', 'step': 8178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:33.481042', 'step': 8178, 'epoch': 2} {'type': 'loss', 'content': 0.16082797944545746, 'timestamp': '2025-09-10 02:35:33.483285', 'step': 8179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:33.513182', 'step': 8179, 'epoch': 2} {'type': 'loss', 'content': 0.05414240434765816, 'timestamp': '2025-09-10 02:35:33.536414', 'step': 8180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.566803', 'step': 8180, 'epoch': 2} {'type': 'loss', 'content': 0.06879551708698273, 'timestamp': '2025-09-10 02:35:33.569817', 'step': 8181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:33.600409', 'step': 8181, 'epoch': 2} {'type': 'loss', 'content': 0.16061456501483917, 'timestamp': '2025-09-10 02:35:33.602964', 'step': 8182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:33.633336', 'step': 8182, 'epoch': 2} {'type': 'loss', 'content': 0.14920875430107117, 'timestamp': '2025-09-10 02:35:33.635768', 'step': 8183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:33.666174', 'step': 8183, 'epoch': 2} {'type': 'loss', 'content': 0.045864954590797424, 'timestamp': '2025-09-10 02:35:33.689823', 'step': 8184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.722191', 'step': 8184, 'epoch': 2} {'type': 'loss', 'content': 0.1297379583120346, 'timestamp': '2025-09-10 02:35:33.725484', 'step': 8185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.757447', 'step': 8185, 'epoch': 2} {'type': 'loss', 'content': 0.136630117893219, 'timestamp': '2025-09-10 02:35:33.759913', 'step': 8186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.790967', 'step': 8186, 'epoch': 2} {'type': 'loss', 'content': 0.1478600800037384, 'timestamp': '2025-09-10 02:35:33.793308', 'step': 8187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:33.825443', 'step': 8187, 'epoch': 2} {'type': 'loss', 'content': 0.06085449084639549, 'timestamp': '2025-09-10 02:35:33.849091', 'step': 8188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:33.879780', 'step': 8188, 'epoch': 2} {'type': 'loss', 'content': 0.11825781315565109, 'timestamp': '2025-09-10 02:35:33.882114', 'step': 8189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:33.911870', 'step': 8189, 'epoch': 2} {'type': 'loss', 'content': 0.15287084877490997, 'timestamp': '2025-09-10 02:35:33.914059', 'step': 8190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:33.944627', 'step': 8190, 'epoch': 2} {'type': 'loss', 'content': 0.12099439650774002, 'timestamp': '2025-09-10 02:35:33.947203', 'step': 8191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:33.976967', 'step': 8191, 'epoch': 2} {'type': 'loss', 'content': 0.10133986175060272, 'timestamp': '2025-09-10 02:35:34.001999', 'step': 8192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:34.031957', 'step': 8192, 'epoch': 2} {'type': 'loss', 'content': 0.1554643213748932, 'timestamp': '2025-09-10 02:35:34.034259', 'step': 8193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:34.064202', 'step': 8193, 'epoch': 2} {'type': 'loss', 'content': 0.16239191591739655, 'timestamp': '2025-09-10 02:35:34.066547', 'step': 8194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:34.097508', 'step': 8194, 'epoch': 2} {'type': 'loss', 'content': 0.18627196550369263, 'timestamp': '2025-09-10 02:35:34.101483', 'step': 8195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:34.131595', 'step': 8195, 'epoch': 2} {'type': 'loss', 'content': 0.06209446117281914, 'timestamp': '2025-09-10 02:35:34.155700', 'step': 8196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:34.186249', 'step': 8196, 'epoch': 2} {'type': 'loss', 'content': 0.13436345756053925, 'timestamp': '2025-09-10 02:35:34.188386', 'step': 8197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:34.218022', 'step': 8197, 'epoch': 2} {'type': 'loss', 'content': 0.09391680359840393, 'timestamp': '2025-09-10 02:35:34.220231', 'step': 8198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:34.249498', 'step': 8198, 'epoch': 2} {'type': 'loss', 'content': 0.12612877786159515, 'timestamp': '2025-09-10 02:35:34.251622', 'step': 8199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:34.281629', 'step': 8199, 'epoch': 2} {'type': 'loss', 'content': 0.05484632030129433, 'timestamp': '2025-09-10 02:35:34.307858', 'step': 8200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:34.338578', 'step': 8200, 'epoch': 2} {'type': 'loss', 'content': 0.11036041378974915, 'timestamp': '2025-09-10 02:35:34.340704', 'step': 8201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:34.370360', 'step': 8201, 'epoch': 2} {'type': 'loss', 'content': 0.08807597309350967, 'timestamp': '2025-09-10 02:35:34.372770', 'step': 8202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:34.403197', 'step': 8202, 'epoch': 2} {'type': 'loss', 'content': 0.22930079698562622, 'timestamp': '2025-09-10 02:35:34.406109', 'step': 8203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:34.436507', 'step': 8203, 'epoch': 2} {'type': 'loss', 'content': 0.15365539491176605, 'timestamp': '2025-09-10 02:35:34.460033', 'step': 8204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:34.490056', 'step': 8204, 'epoch': 2} {'type': 'loss', 'content': 0.05146685615181923, 'timestamp': '2025-09-10 02:35:34.492342', 'step': 8205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:34.522282', 'step': 8205, 'epoch': 2} {'type': 'loss', 'content': 0.09824889153242111, 'timestamp': '2025-09-10 02:35:34.524596', 'step': 8206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:34.554243', 'step': 8206, 'epoch': 2} {'type': 'loss', 'content': 0.15071514248847961, 'timestamp': '2025-09-10 02:35:34.556721', 'step': 8207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:34.587887', 'step': 8207, 'epoch': 2} {'type': 'loss', 'content': 0.1480972319841385, 'timestamp': '2025-09-10 02:35:34.611579', 'step': 8208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:34.642863', 'step': 8208, 'epoch': 2} {'type': 'loss', 'content': 0.12419095635414124, 'timestamp': '2025-09-10 02:35:34.645334', 'step': 8209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:34.675394', 'step': 8209, 'epoch': 2} {'type': 'loss', 'content': 0.12867479026317596, 'timestamp': '2025-09-10 02:35:34.678165', 'step': 8210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:34.708923', 'step': 8210, 'epoch': 2} {'type': 'loss', 'content': 0.1606026291847229, 'timestamp': '2025-09-10 02:35:34.711330', 'step': 8211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:34.742071', 'step': 8211, 'epoch': 2} {'type': 'loss', 'content': 0.1613147109746933, 'timestamp': '2025-09-10 02:35:34.765944', 'step': 8212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:34.796410', 'step': 8212, 'epoch': 2} {'type': 'loss', 'content': 0.1517116129398346, 'timestamp': '2025-09-10 02:35:34.798467', 'step': 8213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:34.828207', 'step': 8213, 'epoch': 2} {'type': 'loss', 'content': 0.18357522785663605, 'timestamp': '2025-09-10 02:35:34.830696', 'step': 8214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:34.861018', 'step': 8214, 'epoch': 2} {'type': 'loss', 'content': 0.12791705131530762, 'timestamp': '2025-09-10 02:35:34.864093', 'step': 8215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:34.894973', 'step': 8215, 'epoch': 2} {'type': 'loss', 'content': 0.09199485927820206, 'timestamp': '2025-09-10 02:35:34.918305', 'step': 8216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:34.950460', 'step': 8216, 'epoch': 2} {'type': 'loss', 'content': 0.12878435850143433, 'timestamp': '2025-09-10 02:35:34.952835', 'step': 8217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:34.982880', 'step': 8217, 'epoch': 2} {'type': 'loss', 'content': 0.2657735049724579, 'timestamp': '2025-09-10 02:35:34.988572', 'step': 8218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:35.022581', 'step': 8218, 'epoch': 2} {'type': 'loss', 'content': 0.04287626966834068, 'timestamp': '2025-09-10 02:35:35.024610', 'step': 8219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.055126', 'step': 8219, 'epoch': 2} {'type': 'loss', 'content': 0.0659627616405487, 'timestamp': '2025-09-10 02:35:35.078543', 'step': 8220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.108671', 'step': 8220, 'epoch': 2} {'type': 'loss', 'content': 0.08155599236488342, 'timestamp': '2025-09-10 02:35:35.110973', 'step': 8221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.142311', 'step': 8221, 'epoch': 2} {'type': 'loss', 'content': 0.09492407739162445, 'timestamp': '2025-09-10 02:35:35.144676', 'step': 8222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:35.175929', 'step': 8222, 'epoch': 2} {'type': 'loss', 'content': 0.17145231366157532, 'timestamp': '2025-09-10 02:35:35.178159', 'step': 8223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.209560', 'step': 8223, 'epoch': 2} {'type': 'loss', 'content': 0.053240206092596054, 'timestamp': '2025-09-10 02:35:35.233015', 'step': 8224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:35.263060', 'step': 8224, 'epoch': 2} {'type': 'loss', 'content': 0.19518166780471802, 'timestamp': '2025-09-10 02:35:35.265705', 'step': 8225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.295711', 'step': 8225, 'epoch': 2} {'type': 'loss', 'content': 0.1581004112958908, 'timestamp': '2025-09-10 02:35:35.297949', 'step': 8226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:35.327503', 'step': 8226, 'epoch': 2} {'type': 'loss', 'content': 0.06785788387060165, 'timestamp': '2025-09-10 02:35:35.329811', 'step': 8227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.359779', 'step': 8227, 'epoch': 2} {'type': 'loss', 'content': 0.14843276143074036, 'timestamp': '2025-09-10 02:35:35.383114', 'step': 8228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:35.412457', 'step': 8228, 'epoch': 2} {'type': 'loss', 'content': 0.14304322004318237, 'timestamp': '2025-09-10 02:35:35.414719', 'step': 8229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.446023', 'step': 8229, 'epoch': 2} {'type': 'loss', 'content': 0.09922189265489578, 'timestamp': '2025-09-10 02:35:35.448511', 'step': 8230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:35.479119', 'step': 8230, 'epoch': 2} {'type': 'loss', 'content': 0.10909959673881531, 'timestamp': '2025-09-10 02:35:35.481805', 'step': 8231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:35.513186', 'step': 8231, 'epoch': 2} {'type': 'loss', 'content': 0.04816799238324165, 'timestamp': '2025-09-10 02:35:35.536731', 'step': 8232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.567375', 'step': 8232, 'epoch': 2} {'type': 'loss', 'content': 0.12270254641771317, 'timestamp': '2025-09-10 02:35:35.569842', 'step': 8233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:35.600091', 'step': 8233, 'epoch': 2} {'type': 'loss', 'content': 0.12055859714746475, 'timestamp': '2025-09-10 02:35:35.602407', 'step': 8234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:35.632417', 'step': 8234, 'epoch': 2} {'type': 'loss', 'content': 0.07105610519647598, 'timestamp': '2025-09-10 02:35:35.634948', 'step': 8235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:35.665089', 'step': 8235, 'epoch': 2} {'type': 'loss', 'content': 0.10106611996889114, 'timestamp': '2025-09-10 02:35:35.688601', 'step': 8236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:35.719407', 'step': 8236, 'epoch': 2} {'type': 'loss', 'content': 0.18049128353595734, 'timestamp': '2025-09-10 02:35:35.722931', 'step': 8237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:35.752967', 'step': 8237, 'epoch': 2} {'type': 'loss', 'content': 0.0510142520070076, 'timestamp': '2025-09-10 02:35:35.755297', 'step': 8238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:35.785121', 'step': 8238, 'epoch': 2} {'type': 'loss', 'content': 0.13521543145179749, 'timestamp': '2025-09-10 02:35:35.787264', 'step': 8239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.817753', 'step': 8239, 'epoch': 2} {'type': 'loss', 'content': 0.16174007952213287, 'timestamp': '2025-09-10 02:35:35.841101', 'step': 8240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:35.871181', 'step': 8240, 'epoch': 2} {'type': 'loss', 'content': 0.06066735461354256, 'timestamp': '2025-09-10 02:35:35.873386', 'step': 8241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:35.903765', 'step': 8241, 'epoch': 2} {'type': 'loss', 'content': 0.11948051303625107, 'timestamp': '2025-09-10 02:35:35.906102', 'step': 8242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:35.936766', 'step': 8242, 'epoch': 2} {'type': 'loss', 'content': 0.10262468457221985, 'timestamp': '2025-09-10 02:35:35.938957', 'step': 8243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:35.969559', 'step': 8243, 'epoch': 2} {'type': 'loss', 'content': 0.12238813191652298, 'timestamp': '2025-09-10 02:35:35.993192', 'step': 8244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.035992', 'step': 8244, 'epoch': 2} {'type': 'loss', 'content': 0.09440051019191742, 'timestamp': '2025-09-10 02:35:36.039252', 'step': 8245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.069668', 'step': 8245, 'epoch': 2} {'type': 'loss', 'content': 0.196275994181633, 'timestamp': '2025-09-10 02:35:36.071698', 'step': 8246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:36.102515', 'step': 8246, 'epoch': 2} {'type': 'loss', 'content': 0.1529829502105713, 'timestamp': '2025-09-10 02:35:36.104700', 'step': 8247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:36.134983', 'step': 8247, 'epoch': 2} {'type': 'loss', 'content': 0.07229730486869812, 'timestamp': '2025-09-10 02:35:36.158762', 'step': 8248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.189147', 'step': 8248, 'epoch': 2} {'type': 'loss', 'content': 0.06434674561023712, 'timestamp': '2025-09-10 02:35:36.191402', 'step': 8249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:36.221410', 'step': 8249, 'epoch': 2} {'type': 'loss', 'content': 0.19885629415512085, 'timestamp': '2025-09-10 02:35:36.224389', 'step': 8250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:36.256081', 'step': 8250, 'epoch': 2} {'type': 'loss', 'content': 0.1569679081439972, 'timestamp': '2025-09-10 02:35:36.258459', 'step': 8251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.288323', 'step': 8251, 'epoch': 2} {'type': 'loss', 'content': 0.1037335991859436, 'timestamp': '2025-09-10 02:35:36.311745', 'step': 8252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.342784', 'step': 8252, 'epoch': 2} {'type': 'loss', 'content': 0.11328253895044327, 'timestamp': '2025-09-10 02:35:36.344924', 'step': 8253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:36.377946', 'step': 8253, 'epoch': 2} {'type': 'loss', 'content': 0.17320439219474792, 'timestamp': '2025-09-10 02:35:36.380161', 'step': 8254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:36.411193', 'step': 8254, 'epoch': 2} {'type': 'loss', 'content': 0.062294818460941315, 'timestamp': '2025-09-10 02:35:36.413509', 'step': 8255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.443415', 'step': 8255, 'epoch': 2} {'type': 'loss', 'content': 0.10081031918525696, 'timestamp': '2025-09-10 02:35:36.466864', 'step': 8256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:36.497027', 'step': 8256, 'epoch': 2} {'type': 'loss', 'content': 0.08190404623746872, 'timestamp': '2025-09-10 02:35:36.499109', 'step': 8257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:36.528729', 'step': 8257, 'epoch': 2} {'type': 'loss', 'content': 0.05976070836186409, 'timestamp': '2025-09-10 02:35:36.532246', 'step': 8258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:36.562212', 'step': 8258, 'epoch': 2} {'type': 'loss', 'content': 0.12686187028884888, 'timestamp': '2025-09-10 02:35:36.564721', 'step': 8259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.598058', 'step': 8259, 'epoch': 2} {'type': 'loss', 'content': 0.1992378532886505, 'timestamp': '2025-09-10 02:35:36.621383', 'step': 8260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.651699', 'step': 8260, 'epoch': 2} {'type': 'loss', 'content': 0.14909574389457703, 'timestamp': '2025-09-10 02:35:36.654274', 'step': 8261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:36.685276', 'step': 8261, 'epoch': 2} {'type': 'loss', 'content': 0.10335444658994675, 'timestamp': '2025-09-10 02:35:36.687397', 'step': 8262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:36.718727', 'step': 8262, 'epoch': 2} {'type': 'loss', 'content': 0.2159615308046341, 'timestamp': '2025-09-10 02:35:36.721352', 'step': 8263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:36.753015', 'step': 8263, 'epoch': 2} {'type': 'loss', 'content': 0.08741426467895508, 'timestamp': '2025-09-10 02:35:36.777246', 'step': 8264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.809281', 'step': 8264, 'epoch': 2} {'type': 'loss', 'content': 0.03650917857885361, 'timestamp': '2025-09-10 02:35:36.812844', 'step': 8265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.847790', 'step': 8265, 'epoch': 2} {'type': 'loss', 'content': 0.13104841113090515, 'timestamp': '2025-09-10 02:35:36.850199', 'step': 8266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:36.880576', 'step': 8266, 'epoch': 2} {'type': 'loss', 'content': 0.14549216628074646, 'timestamp': '2025-09-10 02:35:36.882827', 'step': 8267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:36.913054', 'step': 8267, 'epoch': 2} {'type': 'loss', 'content': 0.16531018912792206, 'timestamp': '2025-09-10 02:35:36.936696', 'step': 8268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:36.967960', 'step': 8268, 'epoch': 2} {'type': 'loss', 'content': 0.14737623929977417, 'timestamp': '2025-09-10 02:35:36.970202', 'step': 8269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:37.000912', 'step': 8269, 'epoch': 2} {'type': 'loss', 'content': 0.12404892593622208, 'timestamp': '2025-09-10 02:35:37.003683', 'step': 8270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:37.033987', 'step': 8270, 'epoch': 2} {'type': 'loss', 'content': 0.09236600250005722, 'timestamp': '2025-09-10 02:35:37.037482', 'step': 8271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:37.068536', 'step': 8271, 'epoch': 2} {'type': 'loss', 'content': 0.1961873173713684, 'timestamp': '2025-09-10 02:35:37.091898', 'step': 8272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:37.122696', 'step': 8272, 'epoch': 2} {'type': 'loss', 'content': 0.060896482318639755, 'timestamp': '2025-09-10 02:35:37.124769', 'step': 8273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:37.155193', 'step': 8273, 'epoch': 2} {'type': 'loss', 'content': 0.11916028708219528, 'timestamp': '2025-09-10 02:35:37.157393', 'step': 8274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:35:37.187775', 'step': 8274, 'epoch': 2} {'type': 'loss', 'content': 0.0860590860247612, 'timestamp': '2025-09-10 02:35:37.191842', 'step': 8275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:37.222467', 'step': 8275, 'epoch': 2} {'type': 'loss', 'content': 0.08333782106637955, 'timestamp': '2025-09-10 02:35:37.246167', 'step': 8276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:37.277221', 'step': 8276, 'epoch': 2} {'type': 'loss', 'content': 0.1730339527130127, 'timestamp': '2025-09-10 02:35:37.279413', 'step': 8277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:37.310127', 'step': 8277, 'epoch': 2} {'type': 'loss', 'content': 0.1514788269996643, 'timestamp': '2025-09-10 02:35:37.312783', 'step': 8278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:37.343511', 'step': 8278, 'epoch': 2} {'type': 'loss', 'content': 0.1474258303642273, 'timestamp': '2025-09-10 02:35:37.345766', 'step': 8279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:37.376194', 'step': 8279, 'epoch': 2} {'type': 'loss', 'content': 0.11372566223144531, 'timestamp': '2025-09-10 02:35:37.399649', 'step': 8280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:37.430592', 'step': 8280, 'epoch': 2} {'type': 'loss', 'content': 0.12829278409481049, 'timestamp': '2025-09-10 02:35:37.433451', 'step': 8281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:37.464392', 'step': 8281, 'epoch': 2} {'type': 'loss', 'content': 0.09508071839809418, 'timestamp': '2025-09-10 02:35:37.466595', 'step': 8282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:37.496957', 'step': 8282, 'epoch': 2} {'type': 'loss', 'content': 0.09187153726816177, 'timestamp': '2025-09-10 02:35:37.499319', 'step': 8283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:37.529348', 'step': 8283, 'epoch': 2} {'type': 'loss', 'content': 0.10179585963487625, 'timestamp': '2025-09-10 02:35:37.552936', 'step': 8284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:37.583381', 'step': 8284, 'epoch': 2} {'type': 'loss', 'content': 0.19124358892440796, 'timestamp': '2025-09-10 02:35:37.585511', 'step': 8285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:37.615736', 'step': 8285, 'epoch': 2} {'type': 'loss', 'content': 0.08828551322221756, 'timestamp': '2025-09-10 02:35:37.617931', 'step': 8286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:37.648523', 'step': 8286, 'epoch': 2} {'type': 'loss', 'content': 0.2317160815000534, 'timestamp': '2025-09-10 02:35:37.650854', 'step': 8287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:37.682116', 'step': 8287, 'epoch': 2} {'type': 'loss', 'content': 0.15706239640712738, 'timestamp': '2025-09-10 02:35:37.705591', 'step': 8288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:37.736582', 'step': 8288, 'epoch': 2} {'type': 'loss', 'content': 0.10606712847948074, 'timestamp': '2025-09-10 02:35:37.738696', 'step': 8289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:37.768898', 'step': 8289, 'epoch': 2} {'type': 'loss', 'content': 0.11019381135702133, 'timestamp': '2025-09-10 02:35:37.771219', 'step': 8290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:37.801594', 'step': 8290, 'epoch': 2} {'type': 'loss', 'content': 0.15113769471645355, 'timestamp': '2025-09-10 02:35:37.804517', 'step': 8291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:37.835132', 'step': 8291, 'epoch': 2} {'type': 'loss', 'content': 0.1256369650363922, 'timestamp': '2025-09-10 02:35:37.858984', 'step': 8292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:37.889516', 'step': 8292, 'epoch': 2} {'type': 'loss', 'content': 0.06383447349071503, 'timestamp': '2025-09-10 02:35:37.891618', 'step': 8293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:37.921885', 'step': 8293, 'epoch': 2} {'type': 'loss', 'content': 0.10916736721992493, 'timestamp': '2025-09-10 02:35:37.924152', 'step': 8294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:37.954669', 'step': 8294, 'epoch': 2} {'type': 'loss', 'content': 0.07087796926498413, 'timestamp': '2025-09-10 02:35:37.957207', 'step': 8295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:37.986921', 'step': 8295, 'epoch': 2} {'type': 'loss', 'content': 0.08894345909357071, 'timestamp': '2025-09-10 02:35:38.010605', 'step': 8296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.041273', 'step': 8296, 'epoch': 2} {'type': 'loss', 'content': 0.15924523770809174, 'timestamp': '2025-09-10 02:35:38.045097', 'step': 8297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:38.077501', 'step': 8297, 'epoch': 2} {'type': 'loss', 'content': 0.116614930331707, 'timestamp': '2025-09-10 02:35:38.079970', 'step': 8298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:38.110122', 'step': 8298, 'epoch': 2} {'type': 'loss', 'content': 0.2401111125946045, 'timestamp': '2025-09-10 02:35:38.112794', 'step': 8299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.143133', 'step': 8299, 'epoch': 2} {'type': 'loss', 'content': 0.18367014825344086, 'timestamp': '2025-09-10 02:35:38.166613', 'step': 8300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:38.196858', 'step': 8300, 'epoch': 2} {'type': 'loss', 'content': 0.16662070155143738, 'timestamp': '2025-09-10 02:35:38.199246', 'step': 8301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.229214', 'step': 8301, 'epoch': 2} {'type': 'loss', 'content': 0.05528658255934715, 'timestamp': '2025-09-10 02:35:38.232194', 'step': 8302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.262181', 'step': 8302, 'epoch': 2} {'type': 'loss', 'content': 0.1660628616809845, 'timestamp': '2025-09-10 02:35:38.264346', 'step': 8303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.293966', 'step': 8303, 'epoch': 2} {'type': 'loss', 'content': 0.14148515462875366, 'timestamp': '2025-09-10 02:35:38.317353', 'step': 8304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:38.349144', 'step': 8304, 'epoch': 2} {'type': 'loss', 'content': 0.12405217438936234, 'timestamp': '2025-09-10 02:35:38.351098', 'step': 8305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:38.383466', 'step': 8305, 'epoch': 2} {'type': 'loss', 'content': 0.15012408792972565, 'timestamp': '2025-09-10 02:35:38.385652', 'step': 8306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.416727', 'step': 8306, 'epoch': 2} {'type': 'loss', 'content': 0.08756197988986969, 'timestamp': '2025-09-10 02:35:38.418939', 'step': 8307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:38.449132', 'step': 8307, 'epoch': 2} {'type': 'loss', 'content': 0.13958042860031128, 'timestamp': '2025-09-10 02:35:38.472679', 'step': 8308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.502899', 'step': 8308, 'epoch': 2} {'type': 'loss', 'content': 0.10267224162817001, 'timestamp': '2025-09-10 02:35:38.505110', 'step': 8309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:38.535366', 'step': 8309, 'epoch': 2} {'type': 'loss', 'content': 0.09736296534538269, 'timestamp': '2025-09-10 02:35:38.537731', 'step': 8310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:38.567867', 'step': 8310, 'epoch': 2} {'type': 'loss', 'content': 0.1372254192829132, 'timestamp': '2025-09-10 02:35:38.570282', 'step': 8311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.601377', 'step': 8311, 'epoch': 2} {'type': 'loss', 'content': 0.10693926364183426, 'timestamp': '2025-09-10 02:35:38.625139', 'step': 8312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.656410', 'step': 8312, 'epoch': 2} {'type': 'loss', 'content': 0.022232847288250923, 'timestamp': '2025-09-10 02:35:38.661828', 'step': 8313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:38.692607', 'step': 8313, 'epoch': 2} {'type': 'loss', 'content': 0.13339263200759888, 'timestamp': '2025-09-10 02:35:38.694999', 'step': 8314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:38.725646', 'step': 8314, 'epoch': 2} {'type': 'loss', 'content': 0.14704255759716034, 'timestamp': '2025-09-10 02:35:38.727822', 'step': 8315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.757584', 'step': 8315, 'epoch': 2} {'type': 'loss', 'content': 0.1295265555381775, 'timestamp': '2025-09-10 02:35:38.782303', 'step': 8316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:38.812259', 'step': 8316, 'epoch': 2} {'type': 'loss', 'content': 0.11150284856557846, 'timestamp': '2025-09-10 02:35:38.815804', 'step': 8317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:38.848229', 'step': 8317, 'epoch': 2} {'type': 'loss', 'content': 0.1722559928894043, 'timestamp': '2025-09-10 02:35:38.850464', 'step': 8318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:38.881128', 'step': 8318, 'epoch': 2} {'type': 'loss', 'content': 0.05852861329913139, 'timestamp': '2025-09-10 02:35:38.893213', 'step': 8319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:38.925350', 'step': 8319, 'epoch': 2} {'type': 'loss', 'content': 0.11002009361982346, 'timestamp': '2025-09-10 02:35:38.948553', 'step': 8320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:38.979374', 'step': 8320, 'epoch': 2} {'type': 'loss', 'content': 0.07540706545114517, 'timestamp': '2025-09-10 02:35:38.981495', 'step': 8321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:39.011217', 'step': 8321, 'epoch': 2} {'type': 'loss', 'content': 0.053705330938100815, 'timestamp': '2025-09-10 02:35:39.013302', 'step': 8322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:39.043365', 'step': 8322, 'epoch': 2} {'type': 'loss', 'content': 0.2282467931509018, 'timestamp': '2025-09-10 02:35:39.045876', 'step': 8323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:39.076532', 'step': 8323, 'epoch': 2} {'type': 'loss', 'content': 0.07997270673513412, 'timestamp': '2025-09-10 02:35:39.099918', 'step': 8324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:39.129914', 'step': 8324, 'epoch': 2} {'type': 'loss', 'content': 0.17137271165847778, 'timestamp': '2025-09-10 02:35:39.132194', 'step': 8325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:39.161783', 'step': 8325, 'epoch': 2} {'type': 'loss', 'content': 0.17298300564289093, 'timestamp': '2025-09-10 02:35:39.165236', 'step': 8326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:39.195286', 'step': 8326, 'epoch': 2} {'type': 'loss', 'content': 0.11427769809961319, 'timestamp': '2025-09-10 02:35:39.197362', 'step': 8327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:39.226640', 'step': 8327, 'epoch': 2} {'type': 'loss', 'content': 0.1948176920413971, 'timestamp': '2025-09-10 02:35:39.250299', 'step': 8328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:39.281017', 'step': 8328, 'epoch': 2} {'type': 'loss', 'content': 0.11405045539140701, 'timestamp': '2025-09-10 02:35:39.283309', 'step': 8329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:39.312893', 'step': 8329, 'epoch': 2} {'type': 'loss', 'content': 0.14610755443572998, 'timestamp': '2025-09-10 02:35:39.315034', 'step': 8330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:39.345340', 'step': 8330, 'epoch': 2} {'type': 'loss', 'content': 0.06513746082782745, 'timestamp': '2025-09-10 02:35:39.347908', 'step': 8331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:39.377762', 'step': 8331, 'epoch': 2} {'type': 'loss', 'content': 0.1349656581878662, 'timestamp': '2025-09-10 02:35:39.401236', 'step': 8332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:39.431042', 'step': 8332, 'epoch': 2} {'type': 'loss', 'content': 0.07997429370880127, 'timestamp': '2025-09-10 02:35:39.433017', 'step': 8333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:39.462265', 'step': 8333, 'epoch': 2} {'type': 'loss', 'content': 0.20491068065166473, 'timestamp': '2025-09-10 02:35:39.464560', 'step': 8334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:39.496828', 'step': 8334, 'epoch': 2} {'type': 'loss', 'content': 0.09576181322336197, 'timestamp': '2025-09-10 02:35:39.499340', 'step': 8335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:39.529716', 'step': 8335, 'epoch': 2} {'type': 'loss', 'content': 0.11949201673269272, 'timestamp': '2025-09-10 02:35:39.553170', 'step': 8336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:39.584547', 'step': 8336, 'epoch': 2} {'type': 'loss', 'content': 0.11529429256916046, 'timestamp': '2025-09-10 02:35:39.586630', 'step': 8337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:39.616187', 'step': 8337, 'epoch': 2} {'type': 'loss', 'content': 0.12887440621852875, 'timestamp': '2025-09-10 02:35:39.618567', 'step': 8338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:39.648440', 'step': 8338, 'epoch': 2} {'type': 'loss', 'content': 0.11041852086782455, 'timestamp': '2025-09-10 02:35:39.650632', 'step': 8339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:39.680405', 'step': 8339, 'epoch': 2} {'type': 'loss', 'content': 0.07557874172925949, 'timestamp': '2025-09-10 02:35:39.703898', 'step': 8340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:39.733770', 'step': 8340, 'epoch': 2} {'type': 'loss', 'content': 0.06875459849834442, 'timestamp': '2025-09-10 02:35:39.736456', 'step': 8341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:39.766914', 'step': 8341, 'epoch': 2} {'type': 'loss', 'content': 0.15399600565433502, 'timestamp': '2025-09-10 02:35:39.769303', 'step': 8342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:39.800193', 'step': 8342, 'epoch': 2} {'type': 'loss', 'content': 0.1144319549202919, 'timestamp': '2025-09-10 02:35:39.802471', 'step': 8343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:39.832210', 'step': 8343, 'epoch': 2} {'type': 'loss', 'content': 0.14810121059417725, 'timestamp': '2025-09-10 02:35:39.855629', 'step': 8344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:39.885528', 'step': 8344, 'epoch': 2} {'type': 'loss', 'content': 0.09829737991094589, 'timestamp': '2025-09-10 02:35:39.887641', 'step': 8345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:39.918324', 'step': 8345, 'epoch': 2} {'type': 'loss', 'content': 0.24388115108013153, 'timestamp': '2025-09-10 02:35:39.920374', 'step': 8346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:39.951129', 'step': 8346, 'epoch': 2} {'type': 'loss', 'content': 0.15745173394680023, 'timestamp': '2025-09-10 02:35:39.953851', 'step': 8347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:39.983551', 'step': 8347, 'epoch': 2} {'type': 'loss', 'content': 0.17770908772945404, 'timestamp': '2025-09-10 02:35:40.006812', 'step': 8348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:40.043221', 'step': 8348, 'epoch': 2} {'type': 'loss', 'content': 0.2265542894601822, 'timestamp': '2025-09-10 02:35:40.045464', 'step': 8349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:40.076119', 'step': 8349, 'epoch': 2} {'type': 'loss', 'content': 0.05301927402615547, 'timestamp': '2025-09-10 02:35:40.078612', 'step': 8350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:40.108760', 'step': 8350, 'epoch': 2} {'type': 'loss', 'content': 0.14131411910057068, 'timestamp': '2025-09-10 02:35:40.110869', 'step': 8351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:40.140317', 'step': 8351, 'epoch': 2} {'type': 'loss', 'content': 0.10022531449794769, 'timestamp': '2025-09-10 02:35:40.163929', 'step': 8352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:40.193767', 'step': 8352, 'epoch': 2} {'type': 'loss', 'content': 0.1231207624077797, 'timestamp': '2025-09-10 02:35:40.195669', 'step': 8353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:40.225057', 'step': 8353, 'epoch': 2} {'type': 'loss', 'content': 0.13211016356945038, 'timestamp': '2025-09-10 02:35:40.227301', 'step': 8354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:40.257208', 'step': 8354, 'epoch': 2} {'type': 'loss', 'content': 0.10550006479024887, 'timestamp': '2025-09-10 02:35:40.259704', 'step': 8355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:40.291252', 'step': 8355, 'epoch': 2} {'type': 'loss', 'content': 0.12156326323747635, 'timestamp': '2025-09-10 02:35:40.314964', 'step': 8356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:40.345538', 'step': 8356, 'epoch': 2} {'type': 'loss', 'content': 0.126505509018898, 'timestamp': '2025-09-10 02:35:40.347929', 'step': 8357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:40.377363', 'step': 8357, 'epoch': 2} {'type': 'loss', 'content': 0.10797880589962006, 'timestamp': '2025-09-10 02:35:40.381484', 'step': 8358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:40.411725', 'step': 8358, 'epoch': 2} {'type': 'loss', 'content': 0.1059064269065857, 'timestamp': '2025-09-10 02:35:40.414349', 'step': 8359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:40.444122', 'step': 8359, 'epoch': 2} {'type': 'loss', 'content': 0.24888037145137787, 'timestamp': '2025-09-10 02:35:40.467685', 'step': 8360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:40.498937', 'step': 8360, 'epoch': 2} {'type': 'loss', 'content': 0.08888177573680878, 'timestamp': '2025-09-10 02:35:40.501100', 'step': 8361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:40.530769', 'step': 8361, 'epoch': 2} {'type': 'loss', 'content': 0.10159369558095932, 'timestamp': '2025-09-10 02:35:40.534233', 'step': 8362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:40.565599', 'step': 8362, 'epoch': 2} {'type': 'loss', 'content': 0.07818417251110077, 'timestamp': '2025-09-10 02:35:40.568008', 'step': 8363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:40.598028', 'step': 8363, 'epoch': 2} {'type': 'loss', 'content': 0.09403136372566223, 'timestamp': '2025-09-10 02:35:40.621574', 'step': 8364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:40.651648', 'step': 8364, 'epoch': 2} {'type': 'loss', 'content': 0.15486739575862885, 'timestamp': '2025-09-10 02:35:40.654008', 'step': 8365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:40.683952', 'step': 8365, 'epoch': 2} {'type': 'loss', 'content': 0.1689281165599823, 'timestamp': '2025-09-10 02:35:40.688391', 'step': 8366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:40.724844', 'step': 8366, 'epoch': 2} {'type': 'loss', 'content': 0.11721796542406082, 'timestamp': '2025-09-10 02:35:40.730302', 'step': 8367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:40.764157', 'step': 8367, 'epoch': 2} {'type': 'loss', 'content': 0.08195265382528305, 'timestamp': '2025-09-10 02:35:40.792826', 'step': 8368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:40.822732', 'step': 8368, 'epoch': 2} {'type': 'loss', 'content': 0.1458728313446045, 'timestamp': '2025-09-10 02:35:40.824954', 'step': 8369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:40.856277', 'step': 8369, 'epoch': 2} {'type': 'loss', 'content': 0.12796886265277863, 'timestamp': '2025-09-10 02:35:40.864137', 'step': 8370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:40.898868', 'step': 8370, 'epoch': 2} {'type': 'loss', 'content': 0.19119128584861755, 'timestamp': '2025-09-10 02:35:40.901386', 'step': 8371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:40.931129', 'step': 8371, 'epoch': 2} {'type': 'loss', 'content': 0.12613119184970856, 'timestamp': '2025-09-10 02:35:40.954529', 'step': 8372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:40.984737', 'step': 8372, 'epoch': 2} {'type': 'loss', 'content': 0.12674525380134583, 'timestamp': '2025-09-10 02:35:40.990439', 'step': 8373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:41.037727', 'step': 8373, 'epoch': 2} {'type': 'loss', 'content': 0.09495177119970322, 'timestamp': '2025-09-10 02:35:41.040383', 'step': 8374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:35:41.075646', 'step': 8374, 'epoch': 2} {'type': 'loss', 'content': 0.24160349369049072, 'timestamp': '2025-09-10 02:35:41.082649', 'step': 8375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:41.123253', 'step': 8375, 'epoch': 2} {'type': 'loss', 'content': 0.12099909037351608, 'timestamp': '2025-09-10 02:35:41.146840', 'step': 8376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:41.183784', 'step': 8376, 'epoch': 2} {'type': 'loss', 'content': 0.21495889127254486, 'timestamp': '2025-09-10 02:35:41.188216', 'step': 8377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:41.229496', 'step': 8377, 'epoch': 2} {'type': 'loss', 'content': 0.13082514703273773, 'timestamp': '2025-09-10 02:35:41.232201', 'step': 8378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:41.264738', 'step': 8378, 'epoch': 2} {'type': 'loss', 'content': 0.18343544006347656, 'timestamp': '2025-09-10 02:35:41.267793', 'step': 8379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:41.297505', 'step': 8379, 'epoch': 2} {'type': 'loss', 'content': 0.12018293142318726, 'timestamp': '2025-09-10 02:35:41.320989', 'step': 8380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:41.351553', 'step': 8380, 'epoch': 2} {'type': 'loss', 'content': 0.2029973268508911, 'timestamp': '2025-09-10 02:35:41.353802', 'step': 8381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:41.386758', 'step': 8381, 'epoch': 2} {'type': 'loss', 'content': 0.12483207136392593, 'timestamp': '2025-09-10 02:35:41.389364', 'step': 8382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:41.439446', 'step': 8382, 'epoch': 2} {'type': 'loss', 'content': 0.12499602884054184, 'timestamp': '2025-09-10 02:35:41.443672', 'step': 8383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:41.479338', 'step': 8383, 'epoch': 2} {'type': 'loss', 'content': 0.20428086817264557, 'timestamp': '2025-09-10 02:35:41.506658', 'step': 8384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:41.544050', 'step': 8384, 'epoch': 2} {'type': 'loss', 'content': 0.11187706142663956, 'timestamp': '2025-09-10 02:35:41.546238', 'step': 8385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:41.577125', 'step': 8385, 'epoch': 2} {'type': 'loss', 'content': 0.22967529296875, 'timestamp': '2025-09-10 02:35:41.580527', 'step': 8386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:41.640024', 'step': 8386, 'epoch': 2} {'type': 'loss', 'content': 0.2268170565366745, 'timestamp': '2025-09-10 02:35:41.658920', 'step': 8387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:41.711502', 'step': 8387, 'epoch': 2} {'type': 'loss', 'content': 0.12061044573783875, 'timestamp': '2025-09-10 02:35:41.735003', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:35:49.483822', 'step': 8388, 'epoch': 2} {'type': 'pplx', 'content': 11480.156052534634, 'timestamp': '2025-09-10 02:35:49.486693', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:49.515291', 'step': 8388, 'epoch': 2} {'type': 'loss', 'content': 0.11108940839767456, 'timestamp': '2025-09-10 02:35:49.517657', 'step': 8389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:49.548961', 'step': 8389, 'epoch': 2} {'type': 'loss', 'content': 0.10014413297176361, 'timestamp': '2025-09-10 02:35:49.551296', 'step': 8390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:49.581238', 'step': 8390, 'epoch': 2} {'type': 'loss', 'content': 0.07892372459173203, 'timestamp': '2025-09-10 02:35:49.583791', 'step': 8391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:49.613719', 'step': 8391, 'epoch': 2} {'type': 'loss', 'content': 0.19381245970726013, 'timestamp': '2025-09-10 02:35:49.637447', 'step': 8392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:49.668250', 'step': 8392, 'epoch': 2} {'type': 'loss', 'content': 0.07907454669475555, 'timestamp': '2025-09-10 02:35:49.670538', 'step': 8393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:49.701158', 'step': 8393, 'epoch': 2} {'type': 'loss', 'content': 0.08865836262702942, 'timestamp': '2025-09-10 02:35:49.704348', 'step': 8394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:49.734761', 'step': 8394, 'epoch': 2} {'type': 'loss', 'content': 0.12553685903549194, 'timestamp': '2025-09-10 02:35:49.737100', 'step': 8395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:49.767364', 'step': 8395, 'epoch': 2} {'type': 'loss', 'content': 0.1045292466878891, 'timestamp': '2025-09-10 02:35:49.790834', 'step': 8396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:49.820598', 'step': 8396, 'epoch': 2} {'type': 'loss', 'content': 0.1450500190258026, 'timestamp': '2025-09-10 02:35:49.822958', 'step': 8397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:49.852249', 'step': 8397, 'epoch': 2} {'type': 'loss', 'content': 0.17341311275959015, 'timestamp': '2025-09-10 02:35:49.854607', 'step': 8398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:35:49.885001', 'step': 8398, 'epoch': 2} {'type': 'loss', 'content': 0.1636110246181488, 'timestamp': '2025-09-10 02:35:49.889397', 'step': 8399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:49.919305', 'step': 8399, 'epoch': 2} {'type': 'loss', 'content': 0.148839071393013, 'timestamp': '2025-09-10 02:35:49.943215', 'step': 8400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:49.973620', 'step': 8400, 'epoch': 2} {'type': 'loss', 'content': 0.12362763285636902, 'timestamp': '2025-09-10 02:35:49.976267', 'step': 8401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.006214', 'step': 8401, 'epoch': 2} {'type': 'loss', 'content': 0.11853259801864624, 'timestamp': '2025-09-10 02:35:50.008500', 'step': 8402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:50.038273', 'step': 8402, 'epoch': 2} {'type': 'loss', 'content': 0.15358661115169525, 'timestamp': '2025-09-10 02:35:50.040508', 'step': 8403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.069908', 'step': 8403, 'epoch': 2} {'type': 'loss', 'content': 0.09107811748981476, 'timestamp': '2025-09-10 02:35:50.093516', 'step': 8404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:50.124251', 'step': 8404, 'epoch': 2} {'type': 'loss', 'content': 0.11604093760251999, 'timestamp': '2025-09-10 02:35:50.126415', 'step': 8405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:50.156053', 'step': 8405, 'epoch': 2} {'type': 'loss', 'content': 0.07329881191253662, 'timestamp': '2025-09-10 02:35:50.158444', 'step': 8406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.187959', 'step': 8406, 'epoch': 2} {'type': 'loss', 'content': 0.1562776118516922, 'timestamp': '2025-09-10 02:35:50.190267', 'step': 8407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:50.219648', 'step': 8407, 'epoch': 2} {'type': 'loss', 'content': 0.15205822885036469, 'timestamp': '2025-09-10 02:35:50.243294', 'step': 8408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:50.273423', 'step': 8408, 'epoch': 2} {'type': 'loss', 'content': 0.14290864765644073, 'timestamp': '2025-09-10 02:35:50.276167', 'step': 8409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.305919', 'step': 8409, 'epoch': 2} {'type': 'loss', 'content': 0.11495376378297806, 'timestamp': '2025-09-10 02:35:50.309296', 'step': 8410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:50.338739', 'step': 8410, 'epoch': 2} {'type': 'loss', 'content': 0.13489820063114166, 'timestamp': '2025-09-10 02:35:50.341058', 'step': 8411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:50.374443', 'step': 8411, 'epoch': 2} {'type': 'loss', 'content': 0.1169671043753624, 'timestamp': '2025-09-10 02:35:50.398334', 'step': 8412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.428690', 'step': 8412, 'epoch': 2} {'type': 'loss', 'content': 0.12723857164382935, 'timestamp': '2025-09-10 02:35:50.430902', 'step': 8413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:50.459838', 'step': 8413, 'epoch': 2} {'type': 'loss', 'content': 0.058308195322752, 'timestamp': '2025-09-10 02:35:50.461962', 'step': 8414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.492304', 'step': 8414, 'epoch': 2} {'type': 'loss', 'content': 0.12673041224479675, 'timestamp': '2025-09-10 02:35:50.494770', 'step': 8415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.524991', 'step': 8415, 'epoch': 2} {'type': 'loss', 'content': 0.11767882108688354, 'timestamp': '2025-09-10 02:35:50.548384', 'step': 8416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.578783', 'step': 8416, 'epoch': 2} {'type': 'loss', 'content': 0.15936501324176788, 'timestamp': '2025-09-10 02:35:50.581258', 'step': 8417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:50.611551', 'step': 8417, 'epoch': 2} {'type': 'loss', 'content': 0.1456162929534912, 'timestamp': '2025-09-10 02:35:50.613718', 'step': 8418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.646210', 'step': 8418, 'epoch': 2} {'type': 'loss', 'content': 0.19801627099514008, 'timestamp': '2025-09-10 02:35:50.648437', 'step': 8419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.678138', 'step': 8419, 'epoch': 2} {'type': 'loss', 'content': 0.1628202497959137, 'timestamp': '2025-09-10 02:35:50.701751', 'step': 8420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.731909', 'step': 8420, 'epoch': 2} {'type': 'loss', 'content': 0.09586568176746368, 'timestamp': '2025-09-10 02:35:50.733937', 'step': 8421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:50.764698', 'step': 8421, 'epoch': 2} {'type': 'loss', 'content': 0.09525515884160995, 'timestamp': '2025-09-10 02:35:50.767076', 'step': 8422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.797185', 'step': 8422, 'epoch': 2} {'type': 'loss', 'content': 0.16605530679225922, 'timestamp': '2025-09-10 02:35:50.799596', 'step': 8423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.830033', 'step': 8423, 'epoch': 2} {'type': 'loss', 'content': 0.10266382247209549, 'timestamp': '2025-09-10 02:35:50.853746', 'step': 8424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:50.883780', 'step': 8424, 'epoch': 2} {'type': 'loss', 'content': 0.14294840395450592, 'timestamp': '2025-09-10 02:35:50.886096', 'step': 8425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:50.917261', 'step': 8425, 'epoch': 2} {'type': 'loss', 'content': 0.08826868236064911, 'timestamp': '2025-09-10 02:35:50.919519', 'step': 8426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:50.949554', 'step': 8426, 'epoch': 2} {'type': 'loss', 'content': 0.09006704390048981, 'timestamp': '2025-09-10 02:35:50.952232', 'step': 8427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:50.981886', 'step': 8427, 'epoch': 2} {'type': 'loss', 'content': 0.12238170206546783, 'timestamp': '2025-09-10 02:35:51.005519', 'step': 8428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:51.035959', 'step': 8428, 'epoch': 2} {'type': 'loss', 'content': 0.09899291396141052, 'timestamp': '2025-09-10 02:35:51.038445', 'step': 8429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:51.069851', 'step': 8429, 'epoch': 2} {'type': 'loss', 'content': 0.14511588215827942, 'timestamp': '2025-09-10 02:35:51.074767', 'step': 8430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:51.105639', 'step': 8430, 'epoch': 2} {'type': 'loss', 'content': 0.11734025925397873, 'timestamp': '2025-09-10 02:35:51.108023', 'step': 8431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:51.137354', 'step': 8431, 'epoch': 2} {'type': 'loss', 'content': 0.19742679595947266, 'timestamp': '2025-09-10 02:35:51.160782', 'step': 8432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:51.191197', 'step': 8432, 'epoch': 2} {'type': 'loss', 'content': 0.08369706571102142, 'timestamp': '2025-09-10 02:35:51.193507', 'step': 8433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:51.223339', 'step': 8433, 'epoch': 2} {'type': 'loss', 'content': 0.08063854277133942, 'timestamp': '2025-09-10 02:35:51.225657', 'step': 8434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:51.255242', 'step': 8434, 'epoch': 2} {'type': 'loss', 'content': 0.16582335531711578, 'timestamp': '2025-09-10 02:35:51.257490', 'step': 8435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:51.287335', 'step': 8435, 'epoch': 2} {'type': 'loss', 'content': 0.15038292109966278, 'timestamp': '2025-09-10 02:35:51.312713', 'step': 8436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:51.344765', 'step': 8436, 'epoch': 2} {'type': 'loss', 'content': 0.11538831144571304, 'timestamp': '2025-09-10 02:35:51.347145', 'step': 8437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:51.376932', 'step': 8437, 'epoch': 2} {'type': 'loss', 'content': 0.14412622153759003, 'timestamp': '2025-09-10 02:35:51.379388', 'step': 8438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:51.408985', 'step': 8438, 'epoch': 2} {'type': 'loss', 'content': 0.16133828461170197, 'timestamp': '2025-09-10 02:35:51.411244', 'step': 8439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:51.440275', 'step': 8439, 'epoch': 2} {'type': 'loss', 'content': 0.12352047115564346, 'timestamp': '2025-09-10 02:35:51.463670', 'step': 8440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:51.494164', 'step': 8440, 'epoch': 2} {'type': 'loss', 'content': 0.09523539245128632, 'timestamp': '2025-09-10 02:35:51.496412', 'step': 8441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:51.525911', 'step': 8441, 'epoch': 2} {'type': 'loss', 'content': 0.11027683317661285, 'timestamp': '2025-09-10 02:35:51.527828', 'step': 8442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:51.557859', 'step': 8442, 'epoch': 2} {'type': 'loss', 'content': 0.06453815847635269, 'timestamp': '2025-09-10 02:35:51.560119', 'step': 8443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:51.589897', 'step': 8443, 'epoch': 2} {'type': 'loss', 'content': 0.14684590697288513, 'timestamp': '2025-09-10 02:35:51.613148', 'step': 8444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:51.645467', 'step': 8444, 'epoch': 2} {'type': 'loss', 'content': 0.14443348348140717, 'timestamp': '2025-09-10 02:35:51.647786', 'step': 8445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:51.677424', 'step': 8445, 'epoch': 2} {'type': 'loss', 'content': 0.05977438762784004, 'timestamp': '2025-09-10 02:35:51.679593', 'step': 8446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:51.710150', 'step': 8446, 'epoch': 2} {'type': 'loss', 'content': 0.08580762147903442, 'timestamp': '2025-09-10 02:35:51.712416', 'step': 8447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:51.742597', 'step': 8447, 'epoch': 2} {'type': 'loss', 'content': 0.11793757975101471, 'timestamp': '2025-09-10 02:35:51.766239', 'step': 8448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:51.796920', 'step': 8448, 'epoch': 2} {'type': 'loss', 'content': 0.16596408188343048, 'timestamp': '2025-09-10 02:35:51.799315', 'step': 8449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:51.830612', 'step': 8449, 'epoch': 2} {'type': 'loss', 'content': 0.10926443338394165, 'timestamp': '2025-09-10 02:35:51.833063', 'step': 8450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:51.864655', 'step': 8450, 'epoch': 2} {'type': 'loss', 'content': 0.05869714170694351, 'timestamp': '2025-09-10 02:35:51.867733', 'step': 8451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:51.900258', 'step': 8451, 'epoch': 2} {'type': 'loss', 'content': 0.1617080271244049, 'timestamp': '2025-09-10 02:35:51.924576', 'step': 8452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:51.959865', 'step': 8452, 'epoch': 2} {'type': 'loss', 'content': 0.09507586807012558, 'timestamp': '2025-09-10 02:35:51.962454', 'step': 8453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:51.992360', 'step': 8453, 'epoch': 2} {'type': 'loss', 'content': 0.16654647886753082, 'timestamp': '2025-09-10 02:35:51.994695', 'step': 8454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:52.024754', 'step': 8454, 'epoch': 2} {'type': 'loss', 'content': 0.13712851703166962, 'timestamp': '2025-09-10 02:35:52.027536', 'step': 8455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:52.059397', 'step': 8455, 'epoch': 2} {'type': 'loss', 'content': 0.14103645086288452, 'timestamp': '2025-09-10 02:35:52.084299', 'step': 8456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.114708', 'step': 8456, 'epoch': 2} {'type': 'loss', 'content': 0.12389257550239563, 'timestamp': '2025-09-10 02:35:52.116985', 'step': 8457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.147022', 'step': 8457, 'epoch': 2} {'type': 'loss', 'content': 0.14689557254314423, 'timestamp': '2025-09-10 02:35:52.149089', 'step': 8458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:52.178642', 'step': 8458, 'epoch': 2} {'type': 'loss', 'content': 0.14674127101898193, 'timestamp': '2025-09-10 02:35:52.180950', 'step': 8459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:52.210823', 'step': 8459, 'epoch': 2} {'type': 'loss', 'content': 0.086077481508255, 'timestamp': '2025-09-10 02:35:52.234309', 'step': 8460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.271063', 'step': 8460, 'epoch': 2} {'type': 'loss', 'content': 0.10751426964998245, 'timestamp': '2025-09-10 02:35:52.273591', 'step': 8461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.303304', 'step': 8461, 'epoch': 2} {'type': 'loss', 'content': 0.1568533480167389, 'timestamp': '2025-09-10 02:35:52.305497', 'step': 8462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:52.335432', 'step': 8462, 'epoch': 2} {'type': 'loss', 'content': 0.19982153177261353, 'timestamp': '2025-09-10 02:35:52.337627', 'step': 8463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.367895', 'step': 8463, 'epoch': 2} {'type': 'loss', 'content': 0.07179008424282074, 'timestamp': '2025-09-10 02:35:52.391332', 'step': 8464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:52.422126', 'step': 8464, 'epoch': 2} {'type': 'loss', 'content': 0.09011417627334595, 'timestamp': '2025-09-10 02:35:52.424424', 'step': 8465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:52.454184', 'step': 8465, 'epoch': 2} {'type': 'loss', 'content': 0.10474153608083725, 'timestamp': '2025-09-10 02:35:52.466315', 'step': 8466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.503423', 'step': 8466, 'epoch': 2} {'type': 'loss', 'content': 0.1495634764432907, 'timestamp': '2025-09-10 02:35:52.506204', 'step': 8467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:52.536026', 'step': 8467, 'epoch': 2} {'type': 'loss', 'content': 0.08826608955860138, 'timestamp': '2025-09-10 02:35:52.559479', 'step': 8468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:52.589894', 'step': 8468, 'epoch': 2} {'type': 'loss', 'content': 0.14247781038284302, 'timestamp': '2025-09-10 02:35:52.592111', 'step': 8469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.621738', 'step': 8469, 'epoch': 2} {'type': 'loss', 'content': 0.07145947217941284, 'timestamp': '2025-09-10 02:35:52.625306', 'step': 8470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:35:52.654524', 'step': 8470, 'epoch': 2} {'type': 'loss', 'content': 0.20364047586917877, 'timestamp': '2025-09-10 02:35:52.656809', 'step': 8471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:52.686100', 'step': 8471, 'epoch': 2} {'type': 'loss', 'content': 0.13326026499271393, 'timestamp': '2025-09-10 02:35:52.709367', 'step': 8472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:35:52.740043', 'step': 8472, 'epoch': 2} {'type': 'loss', 'content': 0.16495837271213531, 'timestamp': '2025-09-10 02:35:52.742216', 'step': 8473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:52.772021', 'step': 8473, 'epoch': 2} {'type': 'loss', 'content': 0.10117696970701218, 'timestamp': '2025-09-10 02:35:52.774246', 'step': 8474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:52.804024', 'step': 8474, 'epoch': 2} {'type': 'loss', 'content': 0.06704284995794296, 'timestamp': '2025-09-10 02:35:52.806621', 'step': 8475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.836299', 'step': 8475, 'epoch': 2} {'type': 'loss', 'content': 0.17105732858181, 'timestamp': '2025-09-10 02:35:52.861478', 'step': 8476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.892073', 'step': 8476, 'epoch': 2} {'type': 'loss', 'content': 0.08905941247940063, 'timestamp': '2025-09-10 02:35:52.894361', 'step': 8477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:52.923632', 'step': 8477, 'epoch': 2} {'type': 'loss', 'content': 0.08982136845588684, 'timestamp': '2025-09-10 02:35:52.925726', 'step': 8478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:52.957018', 'step': 8478, 'epoch': 2} {'type': 'loss', 'content': 0.0617569275200367, 'timestamp': '2025-09-10 02:35:52.959234', 'step': 8479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:52.989251', 'step': 8479, 'epoch': 2} {'type': 'loss', 'content': 0.1474117785692215, 'timestamp': '2025-09-10 02:35:53.013079', 'step': 8480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:53.042514', 'step': 8480, 'epoch': 2} {'type': 'loss', 'content': 0.09486735612154007, 'timestamp': '2025-09-10 02:35:53.044930', 'step': 8481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:53.075004', 'step': 8481, 'epoch': 2} {'type': 'loss', 'content': 0.22672222554683685, 'timestamp': '2025-09-10 02:35:53.079668', 'step': 8482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:53.110174', 'step': 8482, 'epoch': 2} {'type': 'loss', 'content': 0.1248655617237091, 'timestamp': '2025-09-10 02:35:53.112709', 'step': 8483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:53.143238', 'step': 8483, 'epoch': 2} {'type': 'loss', 'content': 0.09119170159101486, 'timestamp': '2025-09-10 02:35:53.166662', 'step': 8484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:53.196936', 'step': 8484, 'epoch': 2} {'type': 'loss', 'content': 0.08508521318435669, 'timestamp': '2025-09-10 02:35:53.199304', 'step': 8485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:53.229081', 'step': 8485, 'epoch': 2} {'type': 'loss', 'content': 0.13998714089393616, 'timestamp': '2025-09-10 02:35:53.231769', 'step': 8486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:53.261763', 'step': 8486, 'epoch': 2} {'type': 'loss', 'content': 0.09279198944568634, 'timestamp': '2025-09-10 02:35:53.264210', 'step': 8487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:53.294457', 'step': 8487, 'epoch': 2} {'type': 'loss', 'content': 0.18534334003925323, 'timestamp': '2025-09-10 02:35:53.317932', 'step': 8488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:53.348787', 'step': 8488, 'epoch': 2} {'type': 'loss', 'content': 0.1900702565908432, 'timestamp': '2025-09-10 02:35:53.351047', 'step': 8489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:35:53.380887', 'step': 8489, 'epoch': 2} {'type': 'loss', 'content': 0.06446316838264465, 'timestamp': '2025-09-10 02:35:53.383638', 'step': 8490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:53.413945', 'step': 8490, 'epoch': 2} {'type': 'loss', 'content': 0.23993700742721558, 'timestamp': '2025-09-10 02:35:53.416126', 'step': 8491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:53.445968', 'step': 8491, 'epoch': 2} {'type': 'loss', 'content': 0.13826817274093628, 'timestamp': '2025-09-10 02:35:53.470503', 'step': 8492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:53.500960', 'step': 8492, 'epoch': 2} {'type': 'loss', 'content': 0.23643997311592102, 'timestamp': '2025-09-10 02:35:53.503444', 'step': 8493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:53.533328', 'step': 8493, 'epoch': 2} {'type': 'loss', 'content': 0.10774394869804382, 'timestamp': '2025-09-10 02:35:53.535422', 'step': 8494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:53.565373', 'step': 8494, 'epoch': 2} {'type': 'loss', 'content': 0.18676358461380005, 'timestamp': '2025-09-10 02:35:53.567552', 'step': 8495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:53.597631', 'step': 8495, 'epoch': 2} {'type': 'loss', 'content': 0.12865731120109558, 'timestamp': '2025-09-10 02:35:53.621238', 'step': 8496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:35:53.654424', 'step': 8496, 'epoch': 2} {'type': 'loss', 'content': 0.1259704828262329, 'timestamp': '2025-09-10 02:35:53.656734', 'step': 8497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:35:53.686897', 'step': 8497, 'epoch': 2} {'type': 'loss', 'content': 0.07924927026033401, 'timestamp': '2025-09-10 02:35:53.689013', 'step': 8498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:35:53.718918', 'step': 8498, 'epoch': 2} {'type': 'loss', 'content': 0.10928048193454742, 'timestamp': '2025-09-10 02:35:53.720851', 'step': 8499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:35:53.750898', 'step': 8499, 'epoch': 2} {'type': 'loss', 'content': 0.1186809316277504, 'timestamp': '2025-09-10 02:35:53.775798', 'step': 8500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8500', 'timestamp': '2025-09-10 02:36:00.180363', 'step': 8500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:00.218828', 'step': 8500, 'epoch': 2} {'type': 'loss', 'content': 0.21417659521102905, 'timestamp': '2025-09-10 02:36:00.221337', 'step': 8501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:00.251630', 'step': 8501, 'epoch': 2} {'type': 'loss', 'content': 0.1862976849079132, 'timestamp': '2025-09-10 02:36:00.253974', 'step': 8502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:00.284072', 'step': 8502, 'epoch': 2} {'type': 'loss', 'content': 0.10102032870054245, 'timestamp': '2025-09-10 02:36:00.286508', 'step': 8503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:00.319765', 'step': 8503, 'epoch': 2} {'type': 'loss', 'content': 0.18689721822738647, 'timestamp': '2025-09-10 02:36:00.348899', 'step': 8504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:00.382377', 'step': 8504, 'epoch': 2} {'type': 'loss', 'content': 0.15179193019866943, 'timestamp': '2025-09-10 02:36:00.385454', 'step': 8505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:00.418217', 'step': 8505, 'epoch': 2} {'type': 'loss', 'content': 0.04137425497174263, 'timestamp': '2025-09-10 02:36:00.421271', 'step': 8506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:00.453712', 'step': 8506, 'epoch': 2} {'type': 'loss', 'content': 0.17456865310668945, 'timestamp': '2025-09-10 02:36:00.458974', 'step': 8507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:00.494844', 'step': 8507, 'epoch': 2} {'type': 'loss', 'content': 0.122396320104599, 'timestamp': '2025-09-10 02:36:00.518827', 'step': 8508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:00.563048', 'step': 8508, 'epoch': 2} {'type': 'loss', 'content': 0.09808273613452911, 'timestamp': '2025-09-10 02:36:00.565285', 'step': 8509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:00.595434', 'step': 8509, 'epoch': 2} {'type': 'loss', 'content': 0.07532698661088943, 'timestamp': '2025-09-10 02:36:00.597732', 'step': 8510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:00.628219', 'step': 8510, 'epoch': 2} {'type': 'loss', 'content': 0.08962032943964005, 'timestamp': '2025-09-10 02:36:00.631653', 'step': 8511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:00.669494', 'step': 8511, 'epoch': 2} {'type': 'loss', 'content': 0.09871848672628403, 'timestamp': '2025-09-10 02:36:00.693010', 'step': 8512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:00.723223', 'step': 8512, 'epoch': 2} {'type': 'loss', 'content': 0.08934444934129715, 'timestamp': '2025-09-10 02:36:00.725264', 'step': 8513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:00.755411', 'step': 8513, 'epoch': 2} {'type': 'loss', 'content': 0.09052138030529022, 'timestamp': '2025-09-10 02:36:00.757576', 'step': 8514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:00.787925', 'step': 8514, 'epoch': 2} {'type': 'loss', 'content': 0.09168826788663864, 'timestamp': '2025-09-10 02:36:00.790126', 'step': 8515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:36:00.820502', 'step': 8515, 'epoch': 2} {'type': 'loss', 'content': 0.17826496064662933, 'timestamp': '2025-09-10 02:36:00.845376', 'step': 8516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:00.876160', 'step': 8516, 'epoch': 2} {'type': 'loss', 'content': 0.23138628900051117, 'timestamp': '2025-09-10 02:36:00.878386', 'step': 8517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:00.908147', 'step': 8517, 'epoch': 2} {'type': 'loss', 'content': 0.06849309056997299, 'timestamp': '2025-09-10 02:36:00.911151', 'step': 8518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:00.941168', 'step': 8518, 'epoch': 2} {'type': 'loss', 'content': 0.10940025001764297, 'timestamp': '2025-09-10 02:36:00.944664', 'step': 8519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:00.975582', 'step': 8519, 'epoch': 2} {'type': 'loss', 'content': 0.1630571484565735, 'timestamp': '2025-09-10 02:36:01.000071', 'step': 8520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:01.030473', 'step': 8520, 'epoch': 2} {'type': 'loss', 'content': 0.2536992132663727, 'timestamp': '2025-09-10 02:36:01.035824', 'step': 8521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:01.077452', 'step': 8521, 'epoch': 2} {'type': 'loss', 'content': 0.07673946768045425, 'timestamp': '2025-09-10 02:36:01.081102', 'step': 8522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.110921', 'step': 8522, 'epoch': 2} {'type': 'loss', 'content': 0.1361534297466278, 'timestamp': '2025-09-10 02:36:01.119005', 'step': 8523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:01.162571', 'step': 8523, 'epoch': 2} {'type': 'loss', 'content': 0.1510421633720398, 'timestamp': '2025-09-10 02:36:01.187937', 'step': 8524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.218195', 'step': 8524, 'epoch': 2} {'type': 'loss', 'content': 0.14234314858913422, 'timestamp': '2025-09-10 02:36:01.220559', 'step': 8525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.251376', 'step': 8525, 'epoch': 2} {'type': 'loss', 'content': 0.15636631846427917, 'timestamp': '2025-09-10 02:36:01.258969', 'step': 8526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:01.295354', 'step': 8526, 'epoch': 2} {'type': 'loss', 'content': 0.10405224561691284, 'timestamp': '2025-09-10 02:36:01.297782', 'step': 8527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:01.327967', 'step': 8527, 'epoch': 2} {'type': 'loss', 'content': 0.14917024970054626, 'timestamp': '2025-09-10 02:36:01.351423', 'step': 8528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:01.382196', 'step': 8528, 'epoch': 2} {'type': 'loss', 'content': 0.1287589967250824, 'timestamp': '2025-09-10 02:36:01.385330', 'step': 8529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:01.415312', 'step': 8529, 'epoch': 2} {'type': 'loss', 'content': 0.1396767646074295, 'timestamp': '2025-09-10 02:36:01.417338', 'step': 8530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.447781', 'step': 8530, 'epoch': 2} {'type': 'loss', 'content': 0.1286649852991104, 'timestamp': '2025-09-10 02:36:01.450732', 'step': 8531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:01.481795', 'step': 8531, 'epoch': 2} {'type': 'loss', 'content': 0.1253470629453659, 'timestamp': '2025-09-10 02:36:01.505351', 'step': 8532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.535743', 'step': 8532, 'epoch': 2} {'type': 'loss', 'content': 0.1356310248374939, 'timestamp': '2025-09-10 02:36:01.538267', 'step': 8533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.568295', 'step': 8533, 'epoch': 2} {'type': 'loss', 'content': 0.15619538724422455, 'timestamp': '2025-09-10 02:36:01.570499', 'step': 8534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:01.600998', 'step': 8534, 'epoch': 2} {'type': 'loss', 'content': 0.12166083604097366, 'timestamp': '2025-09-10 02:36:01.603509', 'step': 8535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.634010', 'step': 8535, 'epoch': 2} {'type': 'loss', 'content': 0.0928906574845314, 'timestamp': '2025-09-10 02:36:01.657562', 'step': 8536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.687557', 'step': 8536, 'epoch': 2} {'type': 'loss', 'content': 0.17680972814559937, 'timestamp': '2025-09-10 02:36:01.690461', 'step': 8537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.721156', 'step': 8537, 'epoch': 2} {'type': 'loss', 'content': 0.08792401850223541, 'timestamp': '2025-09-10 02:36:01.723595', 'step': 8538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:01.754264', 'step': 8538, 'epoch': 2} {'type': 'loss', 'content': 0.04937904700636864, 'timestamp': '2025-09-10 02:36:01.756749', 'step': 8539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:01.786494', 'step': 8539, 'epoch': 2} {'type': 'loss', 'content': 0.22724048793315887, 'timestamp': '2025-09-10 02:36:01.810298', 'step': 8540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:01.843420', 'step': 8540, 'epoch': 2} {'type': 'loss', 'content': 0.0745375007390976, 'timestamp': '2025-09-10 02:36:01.845821', 'step': 8541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:01.875878', 'step': 8541, 'epoch': 2} {'type': 'loss', 'content': 0.039332352578639984, 'timestamp': '2025-09-10 02:36:01.878579', 'step': 8542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:01.909596', 'step': 8542, 'epoch': 2} {'type': 'loss', 'content': 0.19556665420532227, 'timestamp': '2025-09-10 02:36:01.912344', 'step': 8543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:01.943316', 'step': 8543, 'epoch': 2} {'type': 'loss', 'content': 0.07245458662509918, 'timestamp': '2025-09-10 02:36:01.967096', 'step': 8544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:01.998132', 'step': 8544, 'epoch': 2} {'type': 'loss', 'content': 0.1300794780254364, 'timestamp': '2025-09-10 02:36:02.000594', 'step': 8545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:02.031463', 'step': 8545, 'epoch': 2} {'type': 'loss', 'content': 0.08413734287023544, 'timestamp': '2025-09-10 02:36:02.034249', 'step': 8546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:02.065866', 'step': 8546, 'epoch': 2} {'type': 'loss', 'content': 0.12748445570468903, 'timestamp': '2025-09-10 02:36:02.068000', 'step': 8547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:02.098991', 'step': 8547, 'epoch': 2} {'type': 'loss', 'content': 0.10144434124231339, 'timestamp': '2025-09-10 02:36:02.122805', 'step': 8548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:02.156553', 'step': 8548, 'epoch': 2} {'type': 'loss', 'content': 0.10788707435131073, 'timestamp': '2025-09-10 02:36:02.161534', 'step': 8549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:02.194606', 'step': 8549, 'epoch': 2} {'type': 'loss', 'content': 0.11641755700111389, 'timestamp': '2025-09-10 02:36:02.198353', 'step': 8550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:02.234339', 'step': 8550, 'epoch': 2} {'type': 'loss', 'content': 0.08292936533689499, 'timestamp': '2025-09-10 02:36:02.236771', 'step': 8551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:02.273814', 'step': 8551, 'epoch': 2} {'type': 'loss', 'content': 0.09225143492221832, 'timestamp': '2025-09-10 02:36:02.298969', 'step': 8552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:02.336753', 'step': 8552, 'epoch': 2} {'type': 'loss', 'content': 0.13490383327007294, 'timestamp': '2025-09-10 02:36:02.341558', 'step': 8553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:02.381181', 'step': 8553, 'epoch': 2} {'type': 'loss', 'content': 0.1103130429983139, 'timestamp': '2025-09-10 02:36:02.384501', 'step': 8554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:02.419309', 'step': 8554, 'epoch': 2} {'type': 'loss', 'content': 0.15895970165729523, 'timestamp': '2025-09-10 02:36:02.421760', 'step': 8555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:02.451909', 'step': 8555, 'epoch': 2} {'type': 'loss', 'content': 0.09260547161102295, 'timestamp': '2025-09-10 02:36:02.476371', 'step': 8556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:02.507170', 'step': 8556, 'epoch': 2} {'type': 'loss', 'content': 0.1412232220172882, 'timestamp': '2025-09-10 02:36:02.509643', 'step': 8557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:02.541105', 'step': 8557, 'epoch': 2} {'type': 'loss', 'content': 0.1626925766468048, 'timestamp': '2025-09-10 02:36:02.543637', 'step': 8558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:02.573856', 'step': 8558, 'epoch': 2} {'type': 'loss', 'content': 0.1322132796049118, 'timestamp': '2025-09-10 02:36:02.576095', 'step': 8559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:02.606380', 'step': 8559, 'epoch': 2} {'type': 'loss', 'content': 0.0980570912361145, 'timestamp': '2025-09-10 02:36:02.630074', 'step': 8560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:02.661463', 'step': 8560, 'epoch': 2} {'type': 'loss', 'content': 0.06782171130180359, 'timestamp': '2025-09-10 02:36:02.665544', 'step': 8561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:02.695685', 'step': 8561, 'epoch': 2} {'type': 'loss', 'content': 0.09321396797895432, 'timestamp': '2025-09-10 02:36:02.698073', 'step': 8562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:02.728191', 'step': 8562, 'epoch': 2} {'type': 'loss', 'content': 0.15257661044597626, 'timestamp': '2025-09-10 02:36:02.730574', 'step': 8563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:02.760809', 'step': 8563, 'epoch': 2} {'type': 'loss', 'content': 0.09334257990121841, 'timestamp': '2025-09-10 02:36:02.784358', 'step': 8564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:02.817060', 'step': 8564, 'epoch': 2} {'type': 'loss', 'content': 0.12065025418996811, 'timestamp': '2025-09-10 02:36:02.819853', 'step': 8565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:02.850768', 'step': 8565, 'epoch': 2} {'type': 'loss', 'content': 0.11479383707046509, 'timestamp': '2025-09-10 02:36:02.854868', 'step': 8566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:02.890164', 'step': 8566, 'epoch': 2} {'type': 'loss', 'content': 0.14632529020309448, 'timestamp': '2025-09-10 02:36:02.892889', 'step': 8567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:02.923487', 'step': 8567, 'epoch': 2} {'type': 'loss', 'content': 0.16117583215236664, 'timestamp': '2025-09-10 02:36:02.947018', 'step': 8568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:02.977218', 'step': 8568, 'epoch': 2} {'type': 'loss', 'content': 0.09390474855899811, 'timestamp': '2025-09-10 02:36:02.979589', 'step': 8569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:03.009176', 'step': 8569, 'epoch': 2} {'type': 'loss', 'content': 0.13305382430553436, 'timestamp': '2025-09-10 02:36:03.011351', 'step': 8570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:03.044387', 'step': 8570, 'epoch': 2} {'type': 'loss', 'content': 0.11306143552064896, 'timestamp': '2025-09-10 02:36:03.047126', 'step': 8571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:03.082204', 'step': 8571, 'epoch': 2} {'type': 'loss', 'content': 0.1183520182967186, 'timestamp': '2025-09-10 02:36:03.106600', 'step': 8572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:03.138882', 'step': 8572, 'epoch': 2} {'type': 'loss', 'content': 0.15353620052337646, 'timestamp': '2025-09-10 02:36:03.141983', 'step': 8573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:03.174039', 'step': 8573, 'epoch': 2} {'type': 'loss', 'content': 0.13939538598060608, 'timestamp': '2025-09-10 02:36:03.176477', 'step': 8574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:03.211131', 'step': 8574, 'epoch': 2} {'type': 'loss', 'content': 0.07624910026788712, 'timestamp': '2025-09-10 02:36:03.213687', 'step': 8575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:03.246182', 'step': 8575, 'epoch': 2} {'type': 'loss', 'content': 0.08589284121990204, 'timestamp': '2025-09-10 02:36:03.270140', 'step': 8576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:03.302169', 'step': 8576, 'epoch': 2} {'type': 'loss', 'content': 0.07523001730442047, 'timestamp': '2025-09-10 02:36:03.304911', 'step': 8577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:03.335253', 'step': 8577, 'epoch': 2} {'type': 'loss', 'content': 0.18494106829166412, 'timestamp': '2025-09-10 02:36:03.337774', 'step': 8578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:03.370615', 'step': 8578, 'epoch': 2} {'type': 'loss', 'content': 0.17821358144283295, 'timestamp': '2025-09-10 02:36:03.375110', 'step': 8579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:03.408022', 'step': 8579, 'epoch': 2} {'type': 'loss', 'content': 0.09523303061723709, 'timestamp': '2025-09-10 02:36:03.432160', 'step': 8580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:03.466162', 'step': 8580, 'epoch': 2} {'type': 'loss', 'content': 0.19452990591526031, 'timestamp': '2025-09-10 02:36:03.468743', 'step': 8581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:03.500890', 'step': 8581, 'epoch': 2} {'type': 'loss', 'content': 0.14174053072929382, 'timestamp': '2025-09-10 02:36:03.504999', 'step': 8582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:03.548960', 'step': 8582, 'epoch': 2} {'type': 'loss', 'content': 0.10614874958992004, 'timestamp': '2025-09-10 02:36:03.563850', 'step': 8583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:03.594497', 'step': 8583, 'epoch': 2} {'type': 'loss', 'content': 0.17425130307674408, 'timestamp': '2025-09-10 02:36:03.618217', 'step': 8584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:03.648854', 'step': 8584, 'epoch': 2} {'type': 'loss', 'content': 0.099064402282238, 'timestamp': '2025-09-10 02:36:03.651562', 'step': 8585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:03.682858', 'step': 8585, 'epoch': 2} {'type': 'loss', 'content': 0.12520848214626312, 'timestamp': '2025-09-10 02:36:03.685241', 'step': 8586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:03.715654', 'step': 8586, 'epoch': 2} {'type': 'loss', 'content': 0.1285882443189621, 'timestamp': '2025-09-10 02:36:03.718554', 'step': 8587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:03.749219', 'step': 8587, 'epoch': 2} {'type': 'loss', 'content': 0.13581502437591553, 'timestamp': '2025-09-10 02:36:03.772890', 'step': 8588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:03.804218', 'step': 8588, 'epoch': 2} {'type': 'loss', 'content': 0.1025833785533905, 'timestamp': '2025-09-10 02:36:03.807002', 'step': 8589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:03.837495', 'step': 8589, 'epoch': 2} {'type': 'loss', 'content': 0.12774761021137238, 'timestamp': '2025-09-10 02:36:03.840220', 'step': 8590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:03.871208', 'step': 8590, 'epoch': 2} {'type': 'loss', 'content': 0.19030815362930298, 'timestamp': '2025-09-10 02:36:03.874447', 'step': 8591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:03.907138', 'step': 8591, 'epoch': 2} {'type': 'loss', 'content': 0.11295297741889954, 'timestamp': '2025-09-10 02:36:03.931152', 'step': 8592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:03.962183', 'step': 8592, 'epoch': 2} {'type': 'loss', 'content': 0.1810290515422821, 'timestamp': '2025-09-10 02:36:03.964449', 'step': 8593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:03.995573', 'step': 8593, 'epoch': 2} {'type': 'loss', 'content': 0.07194789499044418, 'timestamp': '2025-09-10 02:36:03.999111', 'step': 8594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:04.029796', 'step': 8594, 'epoch': 2} {'type': 'loss', 'content': 0.12104514241218567, 'timestamp': '2025-09-10 02:36:04.031986', 'step': 8595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:04.063808', 'step': 8595, 'epoch': 2} {'type': 'loss', 'content': 0.2136022001504898, 'timestamp': '2025-09-10 02:36:04.087483', 'step': 8596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:04.118535', 'step': 8596, 'epoch': 2} {'type': 'loss', 'content': 0.08789091557264328, 'timestamp': '2025-09-10 02:36:04.121912', 'step': 8597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:04.157311', 'step': 8597, 'epoch': 2} {'type': 'loss', 'content': 0.1302817463874817, 'timestamp': '2025-09-10 02:36:04.179267', 'step': 8598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:04.218348', 'step': 8598, 'epoch': 2} {'type': 'loss', 'content': 0.1289491206407547, 'timestamp': '2025-09-10 02:36:04.220511', 'step': 8599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:04.251705', 'step': 8599, 'epoch': 2} {'type': 'loss', 'content': 0.04466436430811882, 'timestamp': '2025-09-10 02:36:04.278412', 'step': 8600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:04.320787', 'step': 8600, 'epoch': 2} {'type': 'loss', 'content': 0.092351995408535, 'timestamp': '2025-09-10 02:36:04.323911', 'step': 8601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:04.354548', 'step': 8601, 'epoch': 2} {'type': 'loss', 'content': 0.1027156189084053, 'timestamp': '2025-09-10 02:36:04.356955', 'step': 8602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:04.386949', 'step': 8602, 'epoch': 2} {'type': 'loss', 'content': 0.09013363718986511, 'timestamp': '2025-09-10 02:36:04.389344', 'step': 8603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:04.420316', 'step': 8603, 'epoch': 2} {'type': 'loss', 'content': 0.15925388038158417, 'timestamp': '2025-09-10 02:36:04.443901', 'step': 8604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:36:04.477124', 'step': 8604, 'epoch': 2} {'type': 'loss', 'content': 0.14417093992233276, 'timestamp': '2025-09-10 02:36:04.480186', 'step': 8605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:04.513319', 'step': 8605, 'epoch': 2} {'type': 'loss', 'content': 0.083766408264637, 'timestamp': '2025-09-10 02:36:04.516719', 'step': 8606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:04.557902', 'step': 8606, 'epoch': 2} {'type': 'loss', 'content': 0.09933476150035858, 'timestamp': '2025-09-10 02:36:04.560020', 'step': 8607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:04.590946', 'step': 8607, 'epoch': 2} {'type': 'loss', 'content': 0.09151513129472733, 'timestamp': '2025-09-10 02:36:04.614564', 'step': 8608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:04.646380', 'step': 8608, 'epoch': 2} {'type': 'loss', 'content': 0.1882086843252182, 'timestamp': '2025-09-10 02:36:04.650237', 'step': 8609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:04.681030', 'step': 8609, 'epoch': 2} {'type': 'loss', 'content': 0.09476589411497116, 'timestamp': '2025-09-10 02:36:04.686898', 'step': 8610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:04.717956', 'step': 8610, 'epoch': 2} {'type': 'loss', 'content': 0.08512597531080246, 'timestamp': '2025-09-10 02:36:04.720085', 'step': 8611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:04.753184', 'step': 8611, 'epoch': 2} {'type': 'loss', 'content': 0.10331454128026962, 'timestamp': '2025-09-10 02:36:04.777312', 'step': 8612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:04.807054', 'step': 8612, 'epoch': 2} {'type': 'loss', 'content': 0.10972175002098083, 'timestamp': '2025-09-10 02:36:04.809425', 'step': 8613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:04.845726', 'step': 8613, 'epoch': 2} {'type': 'loss', 'content': 0.10200972855091095, 'timestamp': '2025-09-10 02:36:04.848395', 'step': 8614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:04.878973', 'step': 8614, 'epoch': 2} {'type': 'loss', 'content': 0.09502715617418289, 'timestamp': '2025-09-10 02:36:04.881744', 'step': 8615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:36:04.920502', 'step': 8615, 'epoch': 2} {'type': 'loss', 'content': 0.16573916375637054, 'timestamp': '2025-09-10 02:36:04.948549', 'step': 8616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:04.979498', 'step': 8616, 'epoch': 2} {'type': 'loss', 'content': 0.15702107548713684, 'timestamp': '2025-09-10 02:36:04.982077', 'step': 8617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:05.013148', 'step': 8617, 'epoch': 2} {'type': 'loss', 'content': 0.16911731660366058, 'timestamp': '2025-09-10 02:36:05.015620', 'step': 8618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:05.048747', 'step': 8618, 'epoch': 2} {'type': 'loss', 'content': 0.06442691385746002, 'timestamp': '2025-09-10 02:36:05.052194', 'step': 8619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:05.084032', 'step': 8619, 'epoch': 2} {'type': 'loss', 'content': 0.1104266569018364, 'timestamp': '2025-09-10 02:36:05.108453', 'step': 8620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:05.148272', 'step': 8620, 'epoch': 2} {'type': 'loss', 'content': 0.1242385059595108, 'timestamp': '2025-09-10 02:36:05.150552', 'step': 8621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:05.181914', 'step': 8621, 'epoch': 2} {'type': 'loss', 'content': 0.16892258822917938, 'timestamp': '2025-09-10 02:36:05.184293', 'step': 8622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:05.214541', 'step': 8622, 'epoch': 2} {'type': 'loss', 'content': 0.07959666103124619, 'timestamp': '2025-09-10 02:36:05.217218', 'step': 8623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:05.249220', 'step': 8623, 'epoch': 2} {'type': 'loss', 'content': 0.06206478923559189, 'timestamp': '2025-09-10 02:36:05.272618', 'step': 8624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:05.302921', 'step': 8624, 'epoch': 2} {'type': 'loss', 'content': 0.10789651423692703, 'timestamp': '2025-09-10 02:36:05.305111', 'step': 8625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:05.334902', 'step': 8625, 'epoch': 2} {'type': 'loss', 'content': 0.10293777287006378, 'timestamp': '2025-09-10 02:36:05.337591', 'step': 8626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:05.368383', 'step': 8626, 'epoch': 2} {'type': 'loss', 'content': 0.169324591755867, 'timestamp': '2025-09-10 02:36:05.370478', 'step': 8627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:05.399966', 'step': 8627, 'epoch': 2} {'type': 'loss', 'content': 0.057366590946912766, 'timestamp': '2025-09-10 02:36:05.423428', 'step': 8628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:05.454241', 'step': 8628, 'epoch': 2} {'type': 'loss', 'content': 0.07660345733165741, 'timestamp': '2025-09-10 02:36:05.456435', 'step': 8629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:05.486547', 'step': 8629, 'epoch': 2} {'type': 'loss', 'content': 0.14918510615825653, 'timestamp': '2025-09-10 02:36:05.489139', 'step': 8630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:05.519671', 'step': 8630, 'epoch': 2} {'type': 'loss', 'content': 0.11258959770202637, 'timestamp': '2025-09-10 02:36:05.522449', 'step': 8631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:05.552556', 'step': 8631, 'epoch': 2} {'type': 'loss', 'content': 0.1405874490737915, 'timestamp': '2025-09-10 02:36:05.576247', 'step': 8632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:05.606515', 'step': 8632, 'epoch': 2} {'type': 'loss', 'content': 0.10189169645309448, 'timestamp': '2025-09-10 02:36:05.608825', 'step': 8633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:05.639139', 'step': 8633, 'epoch': 2} {'type': 'loss', 'content': 0.12546014785766602, 'timestamp': '2025-09-10 02:36:05.641365', 'step': 8634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:05.671774', 'step': 8634, 'epoch': 2} {'type': 'loss', 'content': 0.10269813984632492, 'timestamp': '2025-09-10 02:36:05.674386', 'step': 8635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:05.703919', 'step': 8635, 'epoch': 2} {'type': 'loss', 'content': 0.170829638838768, 'timestamp': '2025-09-10 02:36:05.727488', 'step': 8636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:05.758054', 'step': 8636, 'epoch': 2} {'type': 'loss', 'content': 0.10850557684898376, 'timestamp': '2025-09-10 02:36:05.760639', 'step': 8637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:05.790721', 'step': 8637, 'epoch': 2} {'type': 'loss', 'content': 0.13903085887432098, 'timestamp': '2025-09-10 02:36:05.793190', 'step': 8638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:05.823503', 'step': 8638, 'epoch': 2} {'type': 'loss', 'content': 0.11760266125202179, 'timestamp': '2025-09-10 02:36:05.825742', 'step': 8639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:05.856179', 'step': 8639, 'epoch': 2} {'type': 'loss', 'content': 0.17460322380065918, 'timestamp': '2025-09-10 02:36:05.880005', 'step': 8640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:05.910903', 'step': 8640, 'epoch': 2} {'type': 'loss', 'content': 0.11731720715761185, 'timestamp': '2025-09-10 02:36:05.913039', 'step': 8641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:05.943171', 'step': 8641, 'epoch': 2} {'type': 'loss', 'content': 0.1207987442612648, 'timestamp': '2025-09-10 02:36:05.945659', 'step': 8642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:05.976942', 'step': 8642, 'epoch': 2} {'type': 'loss', 'content': 0.1547291874885559, 'timestamp': '2025-09-10 02:36:05.979338', 'step': 8643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:06.010480', 'step': 8643, 'epoch': 2} {'type': 'loss', 'content': 0.07675716280937195, 'timestamp': '2025-09-10 02:36:06.034639', 'step': 8644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:06.066322', 'step': 8644, 'epoch': 2} {'type': 'loss', 'content': 0.038825906813144684, 'timestamp': '2025-09-10 02:36:06.068394', 'step': 8645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.098252', 'step': 8645, 'epoch': 2} {'type': 'loss', 'content': 0.1093854159116745, 'timestamp': '2025-09-10 02:36:06.101009', 'step': 8646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:06.133292', 'step': 8646, 'epoch': 2} {'type': 'loss', 'content': 0.17498958110809326, 'timestamp': '2025-09-10 02:36:06.141189', 'step': 8647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.180033', 'step': 8647, 'epoch': 2} {'type': 'loss', 'content': 0.15233610570430756, 'timestamp': '2025-09-10 02:36:06.203445', 'step': 8648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.234929', 'step': 8648, 'epoch': 2} {'type': 'loss', 'content': 0.10236775875091553, 'timestamp': '2025-09-10 02:36:06.237333', 'step': 8649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:06.267464', 'step': 8649, 'epoch': 2} {'type': 'loss', 'content': 0.09759458154439926, 'timestamp': '2025-09-10 02:36:06.269551', 'step': 8650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:06.299625', 'step': 8650, 'epoch': 2} {'type': 'loss', 'content': 0.17178362607955933, 'timestamp': '2025-09-10 02:36:06.308095', 'step': 8651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:06.339631', 'step': 8651, 'epoch': 2} {'type': 'loss', 'content': 0.06085103377699852, 'timestamp': '2025-09-10 02:36:06.362889', 'step': 8652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.392920', 'step': 8652, 'epoch': 2} {'type': 'loss', 'content': 0.1350780576467514, 'timestamp': '2025-09-10 02:36:06.395179', 'step': 8653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.424818', 'step': 8653, 'epoch': 2} {'type': 'loss', 'content': 0.14877720177173615, 'timestamp': '2025-09-10 02:36:06.427792', 'step': 8654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.460163', 'step': 8654, 'epoch': 2} {'type': 'loss', 'content': 0.1334150731563568, 'timestamp': '2025-09-10 02:36:06.462651', 'step': 8655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.492684', 'step': 8655, 'epoch': 2} {'type': 'loss', 'content': 0.11190163344144821, 'timestamp': '2025-09-10 02:36:06.516408', 'step': 8656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.547226', 'step': 8656, 'epoch': 2} {'type': 'loss', 'content': 0.1170351579785347, 'timestamp': '2025-09-10 02:36:06.549907', 'step': 8657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.580015', 'step': 8657, 'epoch': 2} {'type': 'loss', 'content': 0.12204251438379288, 'timestamp': '2025-09-10 02:36:06.582218', 'step': 8658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.611601', 'step': 8658, 'epoch': 2} {'type': 'loss', 'content': 0.07370603829622269, 'timestamp': '2025-09-10 02:36:06.615127', 'step': 8659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:06.644966', 'step': 8659, 'epoch': 2} {'type': 'loss', 'content': 0.10566624999046326, 'timestamp': '2025-09-10 02:36:06.671023', 'step': 8660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.711931', 'step': 8660, 'epoch': 2} {'type': 'loss', 'content': 0.033297691494226456, 'timestamp': '2025-09-10 02:36:06.718819', 'step': 8661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:06.754293', 'step': 8661, 'epoch': 2} {'type': 'loss', 'content': 0.07235035300254822, 'timestamp': '2025-09-10 02:36:06.756754', 'step': 8662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:06.787866', 'step': 8662, 'epoch': 2} {'type': 'loss', 'content': 0.1077723577618599, 'timestamp': '2025-09-10 02:36:06.790144', 'step': 8663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:06.819661', 'step': 8663, 'epoch': 2} {'type': 'loss', 'content': 0.09160102158784866, 'timestamp': '2025-09-10 02:36:06.843295', 'step': 8664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.874073', 'step': 8664, 'epoch': 2} {'type': 'loss', 'content': 0.1027560830116272, 'timestamp': '2025-09-10 02:36:06.876470', 'step': 8665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:06.906294', 'step': 8665, 'epoch': 2} {'type': 'loss', 'content': 0.195607990026474, 'timestamp': '2025-09-10 02:36:06.908302', 'step': 8666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.937605', 'step': 8666, 'epoch': 2} {'type': 'loss', 'content': 0.09284742921590805, 'timestamp': '2025-09-10 02:36:06.940117', 'step': 8667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:06.970481', 'step': 8667, 'epoch': 2} {'type': 'loss', 'content': 0.076424241065979, 'timestamp': '2025-09-10 02:36:06.993784', 'step': 8668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:07.024185', 'step': 8668, 'epoch': 2} {'type': 'loss', 'content': 0.07914336770772934, 'timestamp': '2025-09-10 02:36:07.026477', 'step': 8669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:07.057599', 'step': 8669, 'epoch': 2} {'type': 'loss', 'content': 0.18994669616222382, 'timestamp': '2025-09-10 02:36:07.060129', 'step': 8670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.091751', 'step': 8670, 'epoch': 2} {'type': 'loss', 'content': 0.07954566925764084, 'timestamp': '2025-09-10 02:36:07.094124', 'step': 8671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:36:07.124820', 'step': 8671, 'epoch': 2} {'type': 'loss', 'content': 0.1594202220439911, 'timestamp': '2025-09-10 02:36:07.152395', 'step': 8672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:07.182773', 'step': 8672, 'epoch': 2} {'type': 'loss', 'content': 0.0838254913687706, 'timestamp': '2025-09-10 02:36:07.186397', 'step': 8673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:07.217092', 'step': 8673, 'epoch': 2} {'type': 'loss', 'content': 0.11414431780576706, 'timestamp': '2025-09-10 02:36:07.219511', 'step': 8674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.251770', 'step': 8674, 'epoch': 2} {'type': 'loss', 'content': 0.1387053281068802, 'timestamp': '2025-09-10 02:36:07.254234', 'step': 8675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.284902', 'step': 8675, 'epoch': 2} {'type': 'loss', 'content': 0.10785014182329178, 'timestamp': '2025-09-10 02:36:07.308646', 'step': 8676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.338913', 'step': 8676, 'epoch': 2} {'type': 'loss', 'content': 0.12090680748224258, 'timestamp': '2025-09-10 02:36:07.341113', 'step': 8677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.371507', 'step': 8677, 'epoch': 2} {'type': 'loss', 'content': 0.09619560837745667, 'timestamp': '2025-09-10 02:36:07.373912', 'step': 8678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:07.405134', 'step': 8678, 'epoch': 2} {'type': 'loss', 'content': 0.11021357029676437, 'timestamp': '2025-09-10 02:36:07.407695', 'step': 8679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:07.441920', 'step': 8679, 'epoch': 2} {'type': 'loss', 'content': 0.09806119650602341, 'timestamp': '2025-09-10 02:36:07.465685', 'step': 8680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:07.496567', 'step': 8680, 'epoch': 2} {'type': 'loss', 'content': 0.16269920766353607, 'timestamp': '2025-09-10 02:36:07.498871', 'step': 8681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:07.528967', 'step': 8681, 'epoch': 2} {'type': 'loss', 'content': 0.084002286195755, 'timestamp': '2025-09-10 02:36:07.531652', 'step': 8682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.562999', 'step': 8682, 'epoch': 2} {'type': 'loss', 'content': 0.10296543687582016, 'timestamp': '2025-09-10 02:36:07.565500', 'step': 8683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:07.596065', 'step': 8683, 'epoch': 2} {'type': 'loss', 'content': 0.07830265909433365, 'timestamp': '2025-09-10 02:36:07.619566', 'step': 8684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.653293', 'step': 8684, 'epoch': 2} {'type': 'loss', 'content': 0.10934757441282272, 'timestamp': '2025-09-10 02:36:07.655858', 'step': 8685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.686508', 'step': 8685, 'epoch': 2} {'type': 'loss', 'content': 0.244172602891922, 'timestamp': '2025-09-10 02:36:07.689409', 'step': 8686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:07.719997', 'step': 8686, 'epoch': 2} {'type': 'loss', 'content': 0.14956873655319214, 'timestamp': '2025-09-10 02:36:07.722462', 'step': 8687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:07.754523', 'step': 8687, 'epoch': 2} {'type': 'loss', 'content': 0.14630462229251862, 'timestamp': '2025-09-10 02:36:07.778099', 'step': 8688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.808604', 'step': 8688, 'epoch': 2} {'type': 'loss', 'content': 0.13208287954330444, 'timestamp': '2025-09-10 02:36:07.811006', 'step': 8689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:07.841197', 'step': 8689, 'epoch': 2} {'type': 'loss', 'content': 0.1433228999376297, 'timestamp': '2025-09-10 02:36:07.843856', 'step': 8690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:07.875091', 'step': 8690, 'epoch': 2} {'type': 'loss', 'content': 0.21032223105430603, 'timestamp': '2025-09-10 02:36:07.877985', 'step': 8691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:07.908601', 'step': 8691, 'epoch': 2} {'type': 'loss', 'content': 0.14005376398563385, 'timestamp': '2025-09-10 02:36:07.932728', 'step': 8692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:07.966600', 'step': 8692, 'epoch': 2} {'type': 'loss', 'content': 0.13704366981983185, 'timestamp': '2025-09-10 02:36:07.969484', 'step': 8693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:08.001968', 'step': 8693, 'epoch': 2} {'type': 'loss', 'content': 0.12288051098585129, 'timestamp': '2025-09-10 02:36:08.004876', 'step': 8694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.036190', 'step': 8694, 'epoch': 2} {'type': 'loss', 'content': 0.06659248471260071, 'timestamp': '2025-09-10 02:36:08.039081', 'step': 8695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.071386', 'step': 8695, 'epoch': 2} {'type': 'loss', 'content': 0.09958615154027939, 'timestamp': '2025-09-10 02:36:08.095014', 'step': 8696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:08.125681', 'step': 8696, 'epoch': 2} {'type': 'loss', 'content': 0.1735149770975113, 'timestamp': '2025-09-10 02:36:08.128557', 'step': 8697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:36:08.162983', 'step': 8697, 'epoch': 2} {'type': 'loss', 'content': 0.21461473405361176, 'timestamp': '2025-09-10 02:36:08.173358', 'step': 8698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:08.206622', 'step': 8698, 'epoch': 2} {'type': 'loss', 'content': 0.10311432927846909, 'timestamp': '2025-09-10 02:36:08.209083', 'step': 8699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.239571', 'step': 8699, 'epoch': 2} {'type': 'loss', 'content': 0.16727504134178162, 'timestamp': '2025-09-10 02:36:08.263397', 'step': 8700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:08.294719', 'step': 8700, 'epoch': 2} {'type': 'loss', 'content': 0.20449338853359222, 'timestamp': '2025-09-10 02:36:08.297430', 'step': 8701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:08.328854', 'step': 8701, 'epoch': 2} {'type': 'loss', 'content': 0.11186444759368896, 'timestamp': '2025-09-10 02:36:08.331226', 'step': 8702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:08.362552', 'step': 8702, 'epoch': 2} {'type': 'loss', 'content': 0.10211600363254547, 'timestamp': '2025-09-10 02:36:08.365036', 'step': 8703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.395985', 'step': 8703, 'epoch': 2} {'type': 'loss', 'content': 0.10440681874752045, 'timestamp': '2025-09-10 02:36:08.419543', 'step': 8704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.449638', 'step': 8704, 'epoch': 2} {'type': 'loss', 'content': 0.11766184121370316, 'timestamp': '2025-09-10 02:36:08.451868', 'step': 8705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:08.482168', 'step': 8705, 'epoch': 2} {'type': 'loss', 'content': 0.0853797197341919, 'timestamp': '2025-09-10 02:36:08.484639', 'step': 8706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:08.515189', 'step': 8706, 'epoch': 2} {'type': 'loss', 'content': 0.10365775227546692, 'timestamp': '2025-09-10 02:36:08.517538', 'step': 8707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.548243', 'step': 8707, 'epoch': 2} {'type': 'loss', 'content': 0.08940877765417099, 'timestamp': '2025-09-10 02:36:08.571631', 'step': 8708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.601585', 'step': 8708, 'epoch': 2} {'type': 'loss', 'content': 0.13151580095291138, 'timestamp': '2025-09-10 02:36:08.603848', 'step': 8709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.633461', 'step': 8709, 'epoch': 2} {'type': 'loss', 'content': 0.10273481905460358, 'timestamp': '2025-09-10 02:36:08.635758', 'step': 8710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:08.666519', 'step': 8710, 'epoch': 2} {'type': 'loss', 'content': 0.09398321807384491, 'timestamp': '2025-09-10 02:36:08.668719', 'step': 8711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:08.698579', 'step': 8711, 'epoch': 2} {'type': 'loss', 'content': 0.09857926517724991, 'timestamp': '2025-09-10 02:36:08.722326', 'step': 8712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.753678', 'step': 8712, 'epoch': 2} {'type': 'loss', 'content': 0.12331738322973251, 'timestamp': '2025-09-10 02:36:08.756478', 'step': 8713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.786911', 'step': 8713, 'epoch': 2} {'type': 'loss', 'content': 0.10589732229709625, 'timestamp': '2025-09-10 02:36:08.789610', 'step': 8714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.819965', 'step': 8714, 'epoch': 2} {'type': 'loss', 'content': 0.12745855748653412, 'timestamp': '2025-09-10 02:36:08.823625', 'step': 8715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:08.857381', 'step': 8715, 'epoch': 2} {'type': 'loss', 'content': 0.13999982178211212, 'timestamp': '2025-09-10 02:36:08.880978', 'step': 8716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.911586', 'step': 8716, 'epoch': 2} {'type': 'loss', 'content': 0.09346932917833328, 'timestamp': '2025-09-10 02:36:08.913815', 'step': 8717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.943707', 'step': 8717, 'epoch': 2} {'type': 'loss', 'content': 0.16367468237876892, 'timestamp': '2025-09-10 02:36:08.946751', 'step': 8718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:08.977910', 'step': 8718, 'epoch': 2} {'type': 'loss', 'content': 0.16852335631847382, 'timestamp': '2025-09-10 02:36:08.980110', 'step': 8719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.009585', 'step': 8719, 'epoch': 2} {'type': 'loss', 'content': 0.15095970034599304, 'timestamp': '2025-09-10 02:36:09.033163', 'step': 8720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:09.063022', 'step': 8720, 'epoch': 2} {'type': 'loss', 'content': 0.11940625309944153, 'timestamp': '2025-09-10 02:36:09.065586', 'step': 8721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:09.095399', 'step': 8721, 'epoch': 2} {'type': 'loss', 'content': 0.20334801077842712, 'timestamp': '2025-09-10 02:36:09.098065', 'step': 8722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.127519', 'step': 8722, 'epoch': 2} {'type': 'loss', 'content': 0.10163267701864243, 'timestamp': '2025-09-10 02:36:09.131629', 'step': 8723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.163131', 'step': 8723, 'epoch': 2} {'type': 'loss', 'content': 0.06837490946054459, 'timestamp': '2025-09-10 02:36:09.187049', 'step': 8724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.217165', 'step': 8724, 'epoch': 2} {'type': 'loss', 'content': 0.08394148200750351, 'timestamp': '2025-09-10 02:36:09.219027', 'step': 8725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:09.248326', 'step': 8725, 'epoch': 2} {'type': 'loss', 'content': 0.09517620503902435, 'timestamp': '2025-09-10 02:36:09.250384', 'step': 8726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.281203', 'step': 8726, 'epoch': 2} {'type': 'loss', 'content': 0.1879623830318451, 'timestamp': '2025-09-10 02:36:09.283372', 'step': 8727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:09.313810', 'step': 8727, 'epoch': 2} {'type': 'loss', 'content': 0.037528567016124725, 'timestamp': '2025-09-10 02:36:09.337754', 'step': 8728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:09.368182', 'step': 8728, 'epoch': 2} {'type': 'loss', 'content': 0.1091727614402771, 'timestamp': '2025-09-10 02:36:09.371449', 'step': 8729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:09.401193', 'step': 8729, 'epoch': 2} {'type': 'loss', 'content': 0.12712515890598297, 'timestamp': '2025-09-10 02:36:09.403438', 'step': 8730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.433947', 'step': 8730, 'epoch': 2} {'type': 'loss', 'content': 0.13953442871570587, 'timestamp': '2025-09-10 02:36:09.436693', 'step': 8731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:09.467693', 'step': 8731, 'epoch': 2} {'type': 'loss', 'content': 0.08443401753902435, 'timestamp': '2025-09-10 02:36:09.491218', 'step': 8732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.522868', 'step': 8732, 'epoch': 2} {'type': 'loss', 'content': 0.13512440025806427, 'timestamp': '2025-09-10 02:36:09.525609', 'step': 8733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:09.557215', 'step': 8733, 'epoch': 2} {'type': 'loss', 'content': 0.09994354844093323, 'timestamp': '2025-09-10 02:36:09.559501', 'step': 8734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:09.589046', 'step': 8734, 'epoch': 2} {'type': 'loss', 'content': 0.04768287390470505, 'timestamp': '2025-09-10 02:36:09.591797', 'step': 8735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.622149', 'step': 8735, 'epoch': 2} {'type': 'loss', 'content': 0.14235088229179382, 'timestamp': '2025-09-10 02:36:09.645823', 'step': 8736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:09.677772', 'step': 8736, 'epoch': 2} {'type': 'loss', 'content': 0.15550608932971954, 'timestamp': '2025-09-10 02:36:09.680098', 'step': 8737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.711497', 'step': 8737, 'epoch': 2} {'type': 'loss', 'content': 0.1407778561115265, 'timestamp': '2025-09-10 02:36:09.713795', 'step': 8738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:09.743576', 'step': 8738, 'epoch': 2} {'type': 'loss', 'content': 0.10489104688167572, 'timestamp': '2025-09-10 02:36:09.746038', 'step': 8739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:09.777021', 'step': 8739, 'epoch': 2} {'type': 'loss', 'content': 0.13323712348937988, 'timestamp': '2025-09-10 02:36:09.800911', 'step': 8740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:09.830962', 'step': 8740, 'epoch': 2} {'type': 'loss', 'content': 0.10744865983724594, 'timestamp': '2025-09-10 02:36:09.833892', 'step': 8741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:09.863426', 'step': 8741, 'epoch': 2} {'type': 'loss', 'content': 0.09607095271348953, 'timestamp': '2025-09-10 02:36:09.865852', 'step': 8742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:09.896416', 'step': 8742, 'epoch': 2} {'type': 'loss', 'content': 0.12691619992256165, 'timestamp': '2025-09-10 02:36:09.898928', 'step': 8743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:09.929348', 'step': 8743, 'epoch': 2} {'type': 'loss', 'content': 0.12568742036819458, 'timestamp': '2025-09-10 02:36:09.953246', 'step': 8744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:09.985325', 'step': 8744, 'epoch': 2} {'type': 'loss', 'content': 0.08204807341098785, 'timestamp': '2025-09-10 02:36:09.987703', 'step': 8745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.017581', 'step': 8745, 'epoch': 2} {'type': 'loss', 'content': 0.16750220954418182, 'timestamp': '2025-09-10 02:36:10.020255', 'step': 8746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.050557', 'step': 8746, 'epoch': 2} {'type': 'loss', 'content': 0.20133008062839508, 'timestamp': '2025-09-10 02:36:10.052981', 'step': 8747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.084728', 'step': 8747, 'epoch': 2} {'type': 'loss', 'content': 0.10032553225755692, 'timestamp': '2025-09-10 02:36:10.108498', 'step': 8748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.139319', 'step': 8748, 'epoch': 2} {'type': 'loss', 'content': 0.13238193094730377, 'timestamp': '2025-09-10 02:36:10.143357', 'step': 8749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:10.182636', 'step': 8749, 'epoch': 2} {'type': 'loss', 'content': 0.11791637539863586, 'timestamp': '2025-09-10 02:36:10.185086', 'step': 8750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.215169', 'step': 8750, 'epoch': 2} {'type': 'loss', 'content': 0.14700129628181458, 'timestamp': '2025-09-10 02:36:10.217529', 'step': 8751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.247249', 'step': 8751, 'epoch': 2} {'type': 'loss', 'content': 0.07033383846282959, 'timestamp': '2025-09-10 02:36:10.270844', 'step': 8752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:10.301014', 'step': 8752, 'epoch': 2} {'type': 'loss', 'content': 0.07838401198387146, 'timestamp': '2025-09-10 02:36:10.303256', 'step': 8753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.333545', 'step': 8753, 'epoch': 2} {'type': 'loss', 'content': 0.10159723460674286, 'timestamp': '2025-09-10 02:36:10.337168', 'step': 8754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:10.369421', 'step': 8754, 'epoch': 2} {'type': 'loss', 'content': 0.11282243579626083, 'timestamp': '2025-09-10 02:36:10.371699', 'step': 8755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:10.402659', 'step': 8755, 'epoch': 2} {'type': 'loss', 'content': 0.15580663084983826, 'timestamp': '2025-09-10 02:36:10.426227', 'step': 8756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:10.457149', 'step': 8756, 'epoch': 2} {'type': 'loss', 'content': 0.06923165917396545, 'timestamp': '2025-09-10 02:36:10.459772', 'step': 8757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:10.490968', 'step': 8757, 'epoch': 2} {'type': 'loss', 'content': 0.11390663683414459, 'timestamp': '2025-09-10 02:36:10.493269', 'step': 8758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:10.523489', 'step': 8758, 'epoch': 2} {'type': 'loss', 'content': 0.09974142909049988, 'timestamp': '2025-09-10 02:36:10.525910', 'step': 8759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:10.556345', 'step': 8759, 'epoch': 2} {'type': 'loss', 'content': 0.07531172782182693, 'timestamp': '2025-09-10 02:36:10.579999', 'step': 8760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:10.611127', 'step': 8760, 'epoch': 2} {'type': 'loss', 'content': 0.21724525094032288, 'timestamp': '2025-09-10 02:36:10.613569', 'step': 8761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:10.644377', 'step': 8761, 'epoch': 2} {'type': 'loss', 'content': 0.1372848004102707, 'timestamp': '2025-09-10 02:36:10.646714', 'step': 8762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.676955', 'step': 8762, 'epoch': 2} {'type': 'loss', 'content': 0.18406379222869873, 'timestamp': '2025-09-10 02:36:10.679516', 'step': 8763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.711135', 'step': 8763, 'epoch': 2} {'type': 'loss', 'content': 0.11716172844171524, 'timestamp': '2025-09-10 02:36:10.734576', 'step': 8764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:10.764908', 'step': 8764, 'epoch': 2} {'type': 'loss', 'content': 0.11781472712755203, 'timestamp': '2025-09-10 02:36:10.770747', 'step': 8765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:10.800204', 'step': 8765, 'epoch': 2} {'type': 'loss', 'content': 0.22514702379703522, 'timestamp': '2025-09-10 02:36:10.802471', 'step': 8766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:10.832363', 'step': 8766, 'epoch': 2} {'type': 'loss', 'content': 0.12616761028766632, 'timestamp': '2025-09-10 02:36:10.835076', 'step': 8767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:10.866442', 'step': 8767, 'epoch': 2} {'type': 'loss', 'content': 0.11031797528266907, 'timestamp': '2025-09-10 02:36:10.891756', 'step': 8768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:10.921412', 'step': 8768, 'epoch': 2} {'type': 'loss', 'content': 0.06458669155836105, 'timestamp': '2025-09-10 02:36:10.923584', 'step': 8769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:10.953340', 'step': 8769, 'epoch': 2} {'type': 'loss', 'content': 0.08155769109725952, 'timestamp': '2025-09-10 02:36:10.955622', 'step': 8770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:10.985705', 'step': 8770, 'epoch': 2} {'type': 'loss', 'content': 0.11590763926506042, 'timestamp': '2025-09-10 02:36:10.988070', 'step': 8771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:11.021857', 'step': 8771, 'epoch': 2} {'type': 'loss', 'content': 0.09244081377983093, 'timestamp': '2025-09-10 02:36:11.045568', 'step': 8772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:11.076401', 'step': 8772, 'epoch': 2} {'type': 'loss', 'content': 0.10419254004955292, 'timestamp': '2025-09-10 02:36:11.078748', 'step': 8773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:11.108708', 'step': 8773, 'epoch': 2} {'type': 'loss', 'content': 0.10637495666742325, 'timestamp': '2025-09-10 02:36:11.111067', 'step': 8774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:11.141298', 'step': 8774, 'epoch': 2} {'type': 'loss', 'content': 0.09577680379152298, 'timestamp': '2025-09-10 02:36:11.143686', 'step': 8775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:11.184888', 'step': 8775, 'epoch': 2} {'type': 'loss', 'content': 0.1097571924328804, 'timestamp': '2025-09-10 02:36:11.208804', 'step': 8776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:11.239011', 'step': 8776, 'epoch': 2} {'type': 'loss', 'content': 0.07421448826789856, 'timestamp': '2025-09-10 02:36:11.241332', 'step': 8777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:11.271628', 'step': 8777, 'epoch': 2} {'type': 'loss', 'content': 0.09937809407711029, 'timestamp': '2025-09-10 02:36:11.275588', 'step': 8778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:11.305920', 'step': 8778, 'epoch': 2} {'type': 'loss', 'content': 0.13115471601486206, 'timestamp': '2025-09-10 02:36:11.308199', 'step': 8779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:11.338639', 'step': 8779, 'epoch': 2} {'type': 'loss', 'content': 0.04577566310763359, 'timestamp': '2025-09-10 02:36:11.362006', 'step': 8780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:11.391613', 'step': 8780, 'epoch': 2} {'type': 'loss', 'content': 0.1527886986732483, 'timestamp': '2025-09-10 02:36:11.393888', 'step': 8781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:36:11.423819', 'step': 8781, 'epoch': 2} {'type': 'loss', 'content': 0.08202745765447617, 'timestamp': '2025-09-10 02:36:11.428138', 'step': 8782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:11.458211', 'step': 8782, 'epoch': 2} {'type': 'loss', 'content': 0.08584440499544144, 'timestamp': '2025-09-10 02:36:11.460607', 'step': 8783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:11.491447', 'step': 8783, 'epoch': 2} {'type': 'loss', 'content': 0.10720159113407135, 'timestamp': '2025-09-10 02:36:11.515450', 'step': 8784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:11.545261', 'step': 8784, 'epoch': 2} {'type': 'loss', 'content': 0.16300398111343384, 'timestamp': '2025-09-10 02:36:11.547566', 'step': 8785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:11.577825', 'step': 8785, 'epoch': 2} {'type': 'loss', 'content': 0.07472904771566391, 'timestamp': '2025-09-10 02:36:11.580198', 'step': 8786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:11.609899', 'step': 8786, 'epoch': 2} {'type': 'loss', 'content': 0.11420433223247528, 'timestamp': '2025-09-10 02:36:11.612410', 'step': 8787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:11.642396', 'step': 8787, 'epoch': 2} {'type': 'loss', 'content': 0.10213396698236465, 'timestamp': '2025-09-10 02:36:11.665977', 'step': 8788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:11.696350', 'step': 8788, 'epoch': 2} {'type': 'loss', 'content': 0.10191040486097336, 'timestamp': '2025-09-10 02:36:11.698539', 'step': 8789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:11.728637', 'step': 8789, 'epoch': 2} {'type': 'loss', 'content': 0.059478506445884705, 'timestamp': '2025-09-10 02:36:11.731144', 'step': 8790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:11.761707', 'step': 8790, 'epoch': 2} {'type': 'loss', 'content': 0.08447586745023727, 'timestamp': '2025-09-10 02:36:11.763901', 'step': 8791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:11.793614', 'step': 8791, 'epoch': 2} {'type': 'loss', 'content': 0.2081722915172577, 'timestamp': '2025-09-10 02:36:11.817115', 'step': 8792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:11.848022', 'step': 8792, 'epoch': 2} {'type': 'loss', 'content': 0.14505545794963837, 'timestamp': '2025-09-10 02:36:11.850548', 'step': 8793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:11.881849', 'step': 8793, 'epoch': 2} {'type': 'loss', 'content': 0.12004975974559784, 'timestamp': '2025-09-10 02:36:11.884337', 'step': 8794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:11.913972', 'step': 8794, 'epoch': 2} {'type': 'loss', 'content': 0.14136286079883575, 'timestamp': '2025-09-10 02:36:11.916233', 'step': 8795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:11.946504', 'step': 8795, 'epoch': 2} {'type': 'loss', 'content': 0.06299494951963425, 'timestamp': '2025-09-10 02:36:11.969953', 'step': 8796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:12.000280', 'step': 8796, 'epoch': 2} {'type': 'loss', 'content': 0.06145394966006279, 'timestamp': '2025-09-10 02:36:12.002542', 'step': 8797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:12.032359', 'step': 8797, 'epoch': 2} {'type': 'loss', 'content': 0.05177450180053711, 'timestamp': '2025-09-10 02:36:12.034658', 'step': 8798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:12.066933', 'step': 8798, 'epoch': 2} {'type': 'loss', 'content': 0.11373201012611389, 'timestamp': '2025-09-10 02:36:12.069097', 'step': 8799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:12.099704', 'step': 8799, 'epoch': 2} {'type': 'loss', 'content': 0.05309145525097847, 'timestamp': '2025-09-10 02:36:12.123181', 'step': 8800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.153859', 'step': 8800, 'epoch': 2} {'type': 'loss', 'content': 0.1440158486366272, 'timestamp': '2025-09-10 02:36:12.156433', 'step': 8801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.187545', 'step': 8801, 'epoch': 2} {'type': 'loss', 'content': 0.09405605494976044, 'timestamp': '2025-09-10 02:36:12.189892', 'step': 8802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:12.219956', 'step': 8802, 'epoch': 2} {'type': 'loss', 'content': 0.05634104460477829, 'timestamp': '2025-09-10 02:36:12.222198', 'step': 8803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:12.252237', 'step': 8803, 'epoch': 2} {'type': 'loss', 'content': 0.0796741396188736, 'timestamp': '2025-09-10 02:36:12.276255', 'step': 8804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:12.306187', 'step': 8804, 'epoch': 2} {'type': 'loss', 'content': 0.1620401293039322, 'timestamp': '2025-09-10 02:36:12.308386', 'step': 8805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.338091', 'step': 8805, 'epoch': 2} {'type': 'loss', 'content': 0.09614522755146027, 'timestamp': '2025-09-10 02:36:12.341607', 'step': 8806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:12.371406', 'step': 8806, 'epoch': 2} {'type': 'loss', 'content': 0.10635463148355484, 'timestamp': '2025-09-10 02:36:12.373801', 'step': 8807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.403892', 'step': 8807, 'epoch': 2} {'type': 'loss', 'content': 0.18878088891506195, 'timestamp': '2025-09-10 02:36:12.427506', 'step': 8808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.458531', 'step': 8808, 'epoch': 2} {'type': 'loss', 'content': 0.18718864023685455, 'timestamp': '2025-09-10 02:36:12.461337', 'step': 8809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:12.491718', 'step': 8809, 'epoch': 2} {'type': 'loss', 'content': 0.08925072103738785, 'timestamp': '2025-09-10 02:36:12.494113', 'step': 8810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.523916', 'step': 8810, 'epoch': 2} {'type': 'loss', 'content': 0.11959125846624374, 'timestamp': '2025-09-10 02:36:12.526250', 'step': 8811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:12.555883', 'step': 8811, 'epoch': 2} {'type': 'loss', 'content': 0.07149282097816467, 'timestamp': '2025-09-10 02:36:12.580473', 'step': 8812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.609945', 'step': 8812, 'epoch': 2} {'type': 'loss', 'content': 0.12241045385599136, 'timestamp': '2025-09-10 02:36:12.612412', 'step': 8813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.642540', 'step': 8813, 'epoch': 2} {'type': 'loss', 'content': 0.10494765639305115, 'timestamp': '2025-09-10 02:36:12.644702', 'step': 8814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.674824', 'step': 8814, 'epoch': 2} {'type': 'loss', 'content': 0.17235781252384186, 'timestamp': '2025-09-10 02:36:12.677143', 'step': 8815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:12.706827', 'step': 8815, 'epoch': 2} {'type': 'loss', 'content': 0.08745107799768448, 'timestamp': '2025-09-10 02:36:12.731108', 'step': 8816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:12.762481', 'step': 8816, 'epoch': 2} {'type': 'loss', 'content': 0.14017970860004425, 'timestamp': '2025-09-10 02:36:12.764868', 'step': 8817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.794520', 'step': 8817, 'epoch': 2} {'type': 'loss', 'content': 0.19154766201972961, 'timestamp': '2025-09-10 02:36:12.796962', 'step': 8818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:12.827212', 'step': 8818, 'epoch': 2} {'type': 'loss', 'content': 0.18243412673473358, 'timestamp': '2025-09-10 02:36:12.829514', 'step': 8819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:12.858822', 'step': 8819, 'epoch': 2} {'type': 'loss', 'content': 0.18312253057956696, 'timestamp': '2025-09-10 02:36:12.882476', 'step': 8820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:12.912475', 'step': 8820, 'epoch': 2} {'type': 'loss', 'content': 0.09718627482652664, 'timestamp': '2025-09-10 02:36:12.914650', 'step': 8821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:12.944267', 'step': 8821, 'epoch': 2} {'type': 'loss', 'content': 0.056700825691223145, 'timestamp': '2025-09-10 02:36:12.946411', 'step': 8822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:12.977113', 'step': 8822, 'epoch': 2} {'type': 'loss', 'content': 0.09979084879159927, 'timestamp': '2025-09-10 02:36:12.979713', 'step': 8823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.009460', 'step': 8823, 'epoch': 2} {'type': 'loss', 'content': 0.1583493947982788, 'timestamp': '2025-09-10 02:36:13.032975', 'step': 8824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:13.063460', 'step': 8824, 'epoch': 2} {'type': 'loss', 'content': 0.07732091844081879, 'timestamp': '2025-09-10 02:36:13.065907', 'step': 8825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:13.096877', 'step': 8825, 'epoch': 2} {'type': 'loss', 'content': 0.0963774025440216, 'timestamp': '2025-09-10 02:36:13.099597', 'step': 8826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.129420', 'step': 8826, 'epoch': 2} {'type': 'loss', 'content': 0.13451752066612244, 'timestamp': '2025-09-10 02:36:13.132072', 'step': 8827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:13.162881', 'step': 8827, 'epoch': 2} {'type': 'loss', 'content': 0.05161795765161514, 'timestamp': '2025-09-10 02:36:13.190065', 'step': 8828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.223804', 'step': 8828, 'epoch': 2} {'type': 'loss', 'content': 0.11186086386442184, 'timestamp': '2025-09-10 02:36:13.226974', 'step': 8829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:13.258207', 'step': 8829, 'epoch': 2} {'type': 'loss', 'content': 0.1332085281610489, 'timestamp': '2025-09-10 02:36:13.260993', 'step': 8830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:13.292305', 'step': 8830, 'epoch': 2} {'type': 'loss', 'content': 0.153560072183609, 'timestamp': '2025-09-10 02:36:13.295478', 'step': 8831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:13.329341', 'step': 8831, 'epoch': 2} {'type': 'loss', 'content': 0.09501499682664871, 'timestamp': '2025-09-10 02:36:13.353134', 'step': 8832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.384359', 'step': 8832, 'epoch': 2} {'type': 'loss', 'content': 0.02638448029756546, 'timestamp': '2025-09-10 02:36:13.386532', 'step': 8833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:13.417028', 'step': 8833, 'epoch': 2} {'type': 'loss', 'content': 0.15998217463493347, 'timestamp': '2025-09-10 02:36:13.419358', 'step': 8834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:13.449249', 'step': 8834, 'epoch': 2} {'type': 'loss', 'content': 0.10257875174283981, 'timestamp': '2025-09-10 02:36:13.452059', 'step': 8835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.482129', 'step': 8835, 'epoch': 2} {'type': 'loss', 'content': 0.18314269185066223, 'timestamp': '2025-09-10 02:36:13.505561', 'step': 8836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:13.536211', 'step': 8836, 'epoch': 2} {'type': 'loss', 'content': 0.10029958188533783, 'timestamp': '2025-09-10 02:36:13.538700', 'step': 8837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.569080', 'step': 8837, 'epoch': 2} {'type': 'loss', 'content': 0.23113983869552612, 'timestamp': '2025-09-10 02:36:13.571915', 'step': 8838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:13.603483', 'step': 8838, 'epoch': 2} {'type': 'loss', 'content': 0.12300239503383636, 'timestamp': '2025-09-10 02:36:13.605699', 'step': 8839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.635051', 'step': 8839, 'epoch': 2} {'type': 'loss', 'content': 0.09355562925338745, 'timestamp': '2025-09-10 02:36:13.658587', 'step': 8840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:13.688991', 'step': 8840, 'epoch': 2} {'type': 'loss', 'content': 0.09903311729431152, 'timestamp': '2025-09-10 02:36:13.691545', 'step': 8841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:13.724276', 'step': 8841, 'epoch': 2} {'type': 'loss', 'content': 0.07917256653308868, 'timestamp': '2025-09-10 02:36:13.726664', 'step': 8842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.757224', 'step': 8842, 'epoch': 2} {'type': 'loss', 'content': 0.11416774243116379, 'timestamp': '2025-09-10 02:36:13.759393', 'step': 8843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.790122', 'step': 8843, 'epoch': 2} {'type': 'loss', 'content': 0.04734240099787712, 'timestamp': '2025-09-10 02:36:13.813877', 'step': 8844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:13.844133', 'step': 8844, 'epoch': 2} {'type': 'loss', 'content': 0.13801923394203186, 'timestamp': '2025-09-10 02:36:13.846160', 'step': 8845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:13.876482', 'step': 8845, 'epoch': 2} {'type': 'loss', 'content': 0.13832509517669678, 'timestamp': '2025-09-10 02:36:13.879397', 'step': 8846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:13.910298', 'step': 8846, 'epoch': 2} {'type': 'loss', 'content': 0.10080473870038986, 'timestamp': '2025-09-10 02:36:13.912428', 'step': 8847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:13.941883', 'step': 8847, 'epoch': 2} {'type': 'loss', 'content': 0.08728041499853134, 'timestamp': '2025-09-10 02:36:13.965489', 'step': 8848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:13.995652', 'step': 8848, 'epoch': 2} {'type': 'loss', 'content': 0.17620138823986053, 'timestamp': '2025-09-10 02:36:13.997812', 'step': 8849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:14.027537', 'step': 8849, 'epoch': 2} {'type': 'loss', 'content': 0.11811994761228561, 'timestamp': '2025-09-10 02:36:14.029693', 'step': 8850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:14.059677', 'step': 8850, 'epoch': 2} {'type': 'loss', 'content': 0.13247057795524597, 'timestamp': '2025-09-10 02:36:14.061939', 'step': 8851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:14.092024', 'step': 8851, 'epoch': 2} {'type': 'loss', 'content': 0.038909912109375, 'timestamp': '2025-09-10 02:36:14.116095', 'step': 8852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:14.146279', 'step': 8852, 'epoch': 2} {'type': 'loss', 'content': 0.11408351361751556, 'timestamp': '2025-09-10 02:36:14.148487', 'step': 8853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:14.178880', 'step': 8853, 'epoch': 2} {'type': 'loss', 'content': 0.12189001590013504, 'timestamp': '2025-09-10 02:36:14.181426', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:36:22.258935', 'step': 8854, 'epoch': 2} {'type': 'pplx', 'content': 12018.59215497532, 'timestamp': '2025-09-10 02:36:22.262501', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:22.291975', 'step': 8854, 'epoch': 2} {'type': 'loss', 'content': 0.10520120710134506, 'timestamp': '2025-09-10 02:36:22.295660', 'step': 8855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:22.326610', 'step': 8855, 'epoch': 2} {'type': 'loss', 'content': 0.23587624728679657, 'timestamp': '2025-09-10 02:36:22.350143', 'step': 8856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:22.380017', 'step': 8856, 'epoch': 2} {'type': 'loss', 'content': 0.12931305170059204, 'timestamp': '2025-09-10 02:36:22.382532', 'step': 8857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:22.412711', 'step': 8857, 'epoch': 2} {'type': 'loss', 'content': 0.06921928375959396, 'timestamp': '2025-09-10 02:36:22.414911', 'step': 8858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:22.445870', 'step': 8858, 'epoch': 2} {'type': 'loss', 'content': 0.14086036384105682, 'timestamp': '2025-09-10 02:36:22.448097', 'step': 8859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:22.478018', 'step': 8859, 'epoch': 2} {'type': 'loss', 'content': 0.09794103354215622, 'timestamp': '2025-09-10 02:36:22.501360', 'step': 8860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:22.531636', 'step': 8860, 'epoch': 2} {'type': 'loss', 'content': 0.1363031566143036, 'timestamp': '2025-09-10 02:36:22.533696', 'step': 8861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:22.563389', 'step': 8861, 'epoch': 2} {'type': 'loss', 'content': 0.10788880288600922, 'timestamp': '2025-09-10 02:36:22.565765', 'step': 8862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:22.595398', 'step': 8862, 'epoch': 2} {'type': 'loss', 'content': 0.05900163948535919, 'timestamp': '2025-09-10 02:36:22.597933', 'step': 8863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:22.628214', 'step': 8863, 'epoch': 2} {'type': 'loss', 'content': 0.11147424578666687, 'timestamp': '2025-09-10 02:36:22.651879', 'step': 8864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:22.681609', 'step': 8864, 'epoch': 2} {'type': 'loss', 'content': 0.11080201715230942, 'timestamp': '2025-09-10 02:36:22.683916', 'step': 8865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:22.714335', 'step': 8865, 'epoch': 2} {'type': 'loss', 'content': 0.08553929626941681, 'timestamp': '2025-09-10 02:36:22.716778', 'step': 8866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:22.748552', 'step': 8866, 'epoch': 2} {'type': 'loss', 'content': 0.12618966400623322, 'timestamp': '2025-09-10 02:36:22.750881', 'step': 8867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:22.781606', 'step': 8867, 'epoch': 2} {'type': 'loss', 'content': 0.18891221284866333, 'timestamp': '2025-09-10 02:36:22.805639', 'step': 8868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:22.836262', 'step': 8868, 'epoch': 2} {'type': 'loss', 'content': 0.07875823974609375, 'timestamp': '2025-09-10 02:36:22.839103', 'step': 8869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:22.869867', 'step': 8869, 'epoch': 2} {'type': 'loss', 'content': 0.12374768406152725, 'timestamp': '2025-09-10 02:36:22.872668', 'step': 8870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:22.903481', 'step': 8870, 'epoch': 2} {'type': 'loss', 'content': 0.11773988604545593, 'timestamp': '2025-09-10 02:36:22.905740', 'step': 8871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:22.937232', 'step': 8871, 'epoch': 2} {'type': 'loss', 'content': 0.0662740021944046, 'timestamp': '2025-09-10 02:36:22.960764', 'step': 8872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:22.992336', 'step': 8872, 'epoch': 2} {'type': 'loss', 'content': 0.15263009071350098, 'timestamp': '2025-09-10 02:36:22.994416', 'step': 8873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:23.024582', 'step': 8873, 'epoch': 2} {'type': 'loss', 'content': 0.09231405705213547, 'timestamp': '2025-09-10 02:36:23.026910', 'step': 8874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:23.056281', 'step': 8874, 'epoch': 2} {'type': 'loss', 'content': 0.13408124446868896, 'timestamp': '2025-09-10 02:36:23.059134', 'step': 8875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:23.089788', 'step': 8875, 'epoch': 2} {'type': 'loss', 'content': 0.09731854498386383, 'timestamp': '2025-09-10 02:36:23.113183', 'step': 8876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:23.142321', 'step': 8876, 'epoch': 2} {'type': 'loss', 'content': 0.2228565663099289, 'timestamp': '2025-09-10 02:36:23.144635', 'step': 8877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:23.174523', 'step': 8877, 'epoch': 2} {'type': 'loss', 'content': 0.1411297470331192, 'timestamp': '2025-09-10 02:36:23.177334', 'step': 8878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:23.207144', 'step': 8878, 'epoch': 2} {'type': 'loss', 'content': 0.13927586376667023, 'timestamp': '2025-09-10 02:36:23.211478', 'step': 8879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:23.242633', 'step': 8879, 'epoch': 2} {'type': 'loss', 'content': 0.2094689905643463, 'timestamp': '2025-09-10 02:36:23.266130', 'step': 8880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:23.297243', 'step': 8880, 'epoch': 2} {'type': 'loss', 'content': 0.07167114317417145, 'timestamp': '2025-09-10 02:36:23.299489', 'step': 8881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:23.331022', 'step': 8881, 'epoch': 2} {'type': 'loss', 'content': 0.16025300323963165, 'timestamp': '2025-09-10 02:36:23.333703', 'step': 8882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:23.364051', 'step': 8882, 'epoch': 2} {'type': 'loss', 'content': 0.09458541870117188, 'timestamp': '2025-09-10 02:36:23.366178', 'step': 8883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:23.396183', 'step': 8883, 'epoch': 2} {'type': 'loss', 'content': 0.14165239036083221, 'timestamp': '2025-09-10 02:36:23.419629', 'step': 8884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:23.449310', 'step': 8884, 'epoch': 2} {'type': 'loss', 'content': 0.11184588074684143, 'timestamp': '2025-09-10 02:36:23.451604', 'step': 8885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:23.481568', 'step': 8885, 'epoch': 2} {'type': 'loss', 'content': 0.2081790715456009, 'timestamp': '2025-09-10 02:36:23.484754', 'step': 8886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:23.516270', 'step': 8886, 'epoch': 2} {'type': 'loss', 'content': 0.10113393515348434, 'timestamp': '2025-09-10 02:36:23.518622', 'step': 8887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:23.548623', 'step': 8887, 'epoch': 2} {'type': 'loss', 'content': 0.09602136164903641, 'timestamp': '2025-09-10 02:36:23.571942', 'step': 8888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:23.601813', 'step': 8888, 'epoch': 2} {'type': 'loss', 'content': 0.055881090462207794, 'timestamp': '2025-09-10 02:36:23.604150', 'step': 8889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:23.634377', 'step': 8889, 'epoch': 2} {'type': 'loss', 'content': 0.1474170684814453, 'timestamp': '2025-09-10 02:36:23.636404', 'step': 8890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:23.665857', 'step': 8890, 'epoch': 2} {'type': 'loss', 'content': 0.10407256335020065, 'timestamp': '2025-09-10 02:36:23.668244', 'step': 8891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:23.697873', 'step': 8891, 'epoch': 2} {'type': 'loss', 'content': 0.14370283484458923, 'timestamp': '2025-09-10 02:36:23.721508', 'step': 8892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:23.751685', 'step': 8892, 'epoch': 2} {'type': 'loss', 'content': 0.07419338822364807, 'timestamp': '2025-09-10 02:36:23.753887', 'step': 8893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:23.783784', 'step': 8893, 'epoch': 2} {'type': 'loss', 'content': 0.15892758965492249, 'timestamp': '2025-09-10 02:36:23.786084', 'step': 8894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:23.816156', 'step': 8894, 'epoch': 2} {'type': 'loss', 'content': 0.0631435289978981, 'timestamp': '2025-09-10 02:36:23.818620', 'step': 8895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:23.849410', 'step': 8895, 'epoch': 2} {'type': 'loss', 'content': 0.07401431351900101, 'timestamp': '2025-09-10 02:36:23.872725', 'step': 8896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:23.902405', 'step': 8896, 'epoch': 2} {'type': 'loss', 'content': 0.13129332661628723, 'timestamp': '2025-09-10 02:36:23.904621', 'step': 8897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:23.935028', 'step': 8897, 'epoch': 2} {'type': 'loss', 'content': 0.11712349951267242, 'timestamp': '2025-09-10 02:36:23.937655', 'step': 8898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:23.968487', 'step': 8898, 'epoch': 2} {'type': 'loss', 'content': 0.09149827063083649, 'timestamp': '2025-09-10 02:36:23.971217', 'step': 8899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:24.000932', 'step': 8899, 'epoch': 2} {'type': 'loss', 'content': 0.1296975165605545, 'timestamp': '2025-09-10 02:36:24.024338', 'step': 8900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:24.055131', 'step': 8900, 'epoch': 2} {'type': 'loss', 'content': 0.09477340430021286, 'timestamp': '2025-09-10 02:36:24.057428', 'step': 8901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.087436', 'step': 8901, 'epoch': 2} {'type': 'loss', 'content': 0.1749519556760788, 'timestamp': '2025-09-10 02:36:24.089857', 'step': 8902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:24.120282', 'step': 8902, 'epoch': 2} {'type': 'loss', 'content': 0.134664386510849, 'timestamp': '2025-09-10 02:36:24.122447', 'step': 8903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.151969', 'step': 8903, 'epoch': 2} {'type': 'loss', 'content': 0.1636953055858612, 'timestamp': '2025-09-10 02:36:24.175172', 'step': 8904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:24.205216', 'step': 8904, 'epoch': 2} {'type': 'loss', 'content': 0.2535037100315094, 'timestamp': '2025-09-10 02:36:24.209239', 'step': 8905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:24.240296', 'step': 8905, 'epoch': 2} {'type': 'loss', 'content': 0.08180621266365051, 'timestamp': '2025-09-10 02:36:24.242592', 'step': 8906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:24.272978', 'step': 8906, 'epoch': 2} {'type': 'loss', 'content': 0.11576850712299347, 'timestamp': '2025-09-10 02:36:24.275322', 'step': 8907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.304960', 'step': 8907, 'epoch': 2} {'type': 'loss', 'content': 0.1743248999118805, 'timestamp': '2025-09-10 02:36:24.328431', 'step': 8908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.358622', 'step': 8908, 'epoch': 2} {'type': 'loss', 'content': 0.08891543745994568, 'timestamp': '2025-09-10 02:36:24.361414', 'step': 8909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:24.393417', 'step': 8909, 'epoch': 2} {'type': 'loss', 'content': 0.1251639723777771, 'timestamp': '2025-09-10 02:36:24.395981', 'step': 8910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.426771', 'step': 8910, 'epoch': 2} {'type': 'loss', 'content': 0.1606098860502243, 'timestamp': '2025-09-10 02:36:24.429318', 'step': 8911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.461206', 'step': 8911, 'epoch': 2} {'type': 'loss', 'content': 0.08749064058065414, 'timestamp': '2025-09-10 02:36:24.485569', 'step': 8912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.519741', 'step': 8912, 'epoch': 2} {'type': 'loss', 'content': 0.0923878625035286, 'timestamp': '2025-09-10 02:36:24.522213', 'step': 8913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:24.552090', 'step': 8913, 'epoch': 2} {'type': 'loss', 'content': 0.06296291947364807, 'timestamp': '2025-09-10 02:36:24.554440', 'step': 8914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.585058', 'step': 8914, 'epoch': 2} {'type': 'loss', 'content': 0.18055906891822815, 'timestamp': '2025-09-10 02:36:24.588229', 'step': 8915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:24.620967', 'step': 8915, 'epoch': 2} {'type': 'loss', 'content': 0.04163677990436554, 'timestamp': '2025-09-10 02:36:24.644802', 'step': 8916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:24.675038', 'step': 8916, 'epoch': 2} {'type': 'loss', 'content': 0.043396685272455215, 'timestamp': '2025-09-10 02:36:24.677156', 'step': 8917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:24.707127', 'step': 8917, 'epoch': 2} {'type': 'loss', 'content': 0.06405839323997498, 'timestamp': '2025-09-10 02:36:24.709696', 'step': 8918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:24.739031', 'step': 8918, 'epoch': 2} {'type': 'loss', 'content': 0.09615153074264526, 'timestamp': '2025-09-10 02:36:24.741100', 'step': 8919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.771235', 'step': 8919, 'epoch': 2} {'type': 'loss', 'content': 0.12020225822925568, 'timestamp': '2025-09-10 02:36:24.794504', 'step': 8920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:24.826586', 'step': 8920, 'epoch': 2} {'type': 'loss', 'content': 0.09610392898321152, 'timestamp': '2025-09-10 02:36:24.828594', 'step': 8921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.859181', 'step': 8921, 'epoch': 2} {'type': 'loss', 'content': 0.12984666228294373, 'timestamp': '2025-09-10 02:36:24.861379', 'step': 8922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:24.891392', 'step': 8922, 'epoch': 2} {'type': 'loss', 'content': 0.19285914301872253, 'timestamp': '2025-09-10 02:36:24.893635', 'step': 8923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:24.924258', 'step': 8923, 'epoch': 2} {'type': 'loss', 'content': 0.1422872394323349, 'timestamp': '2025-09-10 02:36:24.948041', 'step': 8924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:24.978607', 'step': 8924, 'epoch': 2} {'type': 'loss', 'content': 0.10680213570594788, 'timestamp': '2025-09-10 02:36:24.980809', 'step': 8925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:25.010698', 'step': 8925, 'epoch': 2} {'type': 'loss', 'content': 0.08117064833641052, 'timestamp': '2025-09-10 02:36:25.013237', 'step': 8926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:25.043707', 'step': 8926, 'epoch': 2} {'type': 'loss', 'content': 0.07117539644241333, 'timestamp': '2025-09-10 02:36:25.046123', 'step': 8927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:25.077364', 'step': 8927, 'epoch': 2} {'type': 'loss', 'content': 0.2173279970884323, 'timestamp': '2025-09-10 02:36:25.100849', 'step': 8928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:25.131721', 'step': 8928, 'epoch': 2} {'type': 'loss', 'content': 0.1925022155046463, 'timestamp': '2025-09-10 02:36:25.134098', 'step': 8929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:25.163762', 'step': 8929, 'epoch': 2} {'type': 'loss', 'content': 0.16818080842494965, 'timestamp': '2025-09-10 02:36:25.166088', 'step': 8930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.196608', 'step': 8930, 'epoch': 2} {'type': 'loss', 'content': 0.08042783290147781, 'timestamp': '2025-09-10 02:36:25.198812', 'step': 8931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:25.228928', 'step': 8931, 'epoch': 2} {'type': 'loss', 'content': 0.07243619114160538, 'timestamp': '2025-09-10 02:36:25.252601', 'step': 8932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:25.286374', 'step': 8932, 'epoch': 2} {'type': 'loss', 'content': 0.04656672477722168, 'timestamp': '2025-09-10 02:36:25.288525', 'step': 8933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.318453', 'step': 8933, 'epoch': 2} {'type': 'loss', 'content': 0.12376163899898529, 'timestamp': '2025-09-10 02:36:25.321086', 'step': 8934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.350451', 'step': 8934, 'epoch': 2} {'type': 'loss', 'content': 0.09747497737407684, 'timestamp': '2025-09-10 02:36:25.354077', 'step': 8935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:25.383951', 'step': 8935, 'epoch': 2} {'type': 'loss', 'content': 0.04835633188486099, 'timestamp': '2025-09-10 02:36:25.407373', 'step': 8936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:25.438320', 'step': 8936, 'epoch': 2} {'type': 'loss', 'content': 0.1738952249288559, 'timestamp': '2025-09-10 02:36:25.441519', 'step': 8937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.472779', 'step': 8937, 'epoch': 2} {'type': 'loss', 'content': 0.03807901591062546, 'timestamp': '2025-09-10 02:36:25.475094', 'step': 8938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:25.505091', 'step': 8938, 'epoch': 2} {'type': 'loss', 'content': 0.13844099640846252, 'timestamp': '2025-09-10 02:36:25.507305', 'step': 8939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:25.536955', 'step': 8939, 'epoch': 2} {'type': 'loss', 'content': 0.12202388793230057, 'timestamp': '2025-09-10 02:36:25.560372', 'step': 8940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.590783', 'step': 8940, 'epoch': 2} {'type': 'loss', 'content': 0.09047745913267136, 'timestamp': '2025-09-10 02:36:25.593037', 'step': 8941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:25.625025', 'step': 8941, 'epoch': 2} {'type': 'loss', 'content': 0.10110977292060852, 'timestamp': '2025-09-10 02:36:25.627767', 'step': 8942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.658248', 'step': 8942, 'epoch': 2} {'type': 'loss', 'content': 0.09152586758136749, 'timestamp': '2025-09-10 02:36:25.661353', 'step': 8943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:25.691681', 'step': 8943, 'epoch': 2} {'type': 'loss', 'content': 0.08257564157247543, 'timestamp': '2025-09-10 02:36:25.715498', 'step': 8944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:25.746292', 'step': 8944, 'epoch': 2} {'type': 'loss', 'content': 0.141976460814476, 'timestamp': '2025-09-10 02:36:25.748723', 'step': 8945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:25.778949', 'step': 8945, 'epoch': 2} {'type': 'loss', 'content': 0.08908786624670029, 'timestamp': '2025-09-10 02:36:25.781405', 'step': 8946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.812134', 'step': 8946, 'epoch': 2} {'type': 'loss', 'content': 0.07704219967126846, 'timestamp': '2025-09-10 02:36:25.814716', 'step': 8947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.846088', 'step': 8947, 'epoch': 2} {'type': 'loss', 'content': 0.1298092007637024, 'timestamp': '2025-09-10 02:36:25.870497', 'step': 8948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:25.901778', 'step': 8948, 'epoch': 2} {'type': 'loss', 'content': 0.1255015879869461, 'timestamp': '2025-09-10 02:36:25.904091', 'step': 8949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.934238', 'step': 8949, 'epoch': 2} {'type': 'loss', 'content': 0.1683724969625473, 'timestamp': '2025-09-10 02:36:25.936166', 'step': 8950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:25.966907', 'step': 8950, 'epoch': 2} {'type': 'loss', 'content': 0.09395317733287811, 'timestamp': '2025-09-10 02:36:25.969192', 'step': 8951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.000996', 'step': 8951, 'epoch': 2} {'type': 'loss', 'content': 0.07714907079935074, 'timestamp': '2025-09-10 02:36:26.024527', 'step': 8952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.054615', 'step': 8952, 'epoch': 2} {'type': 'loss', 'content': 0.12170238047838211, 'timestamp': '2025-09-10 02:36:26.056807', 'step': 8953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:26.087495', 'step': 8953, 'epoch': 2} {'type': 'loss', 'content': 0.0716838389635086, 'timestamp': '2025-09-10 02:36:26.089573', 'step': 8954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:26.120504', 'step': 8954, 'epoch': 2} {'type': 'loss', 'content': 0.11812766641378403, 'timestamp': '2025-09-10 02:36:26.122943', 'step': 8955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:26.153302', 'step': 8955, 'epoch': 2} {'type': 'loss', 'content': 0.24793684482574463, 'timestamp': '2025-09-10 02:36:26.176891', 'step': 8956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.206961', 'step': 8956, 'epoch': 2} {'type': 'loss', 'content': 0.12866896390914917, 'timestamp': '2025-09-10 02:36:26.209105', 'step': 8957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:26.244370', 'step': 8957, 'epoch': 2} {'type': 'loss', 'content': 0.09582977741956711, 'timestamp': '2025-09-10 02:36:26.246816', 'step': 8958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:26.276933', 'step': 8958, 'epoch': 2} {'type': 'loss', 'content': 0.061061739921569824, 'timestamp': '2025-09-10 02:36:26.279080', 'step': 8959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:26.309233', 'step': 8959, 'epoch': 2} {'type': 'loss', 'content': 0.10200797766447067, 'timestamp': '2025-09-10 02:36:26.332859', 'step': 8960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:26.362962', 'step': 8960, 'epoch': 2} {'type': 'loss', 'content': 0.15584403276443481, 'timestamp': '2025-09-10 02:36:26.366595', 'step': 8961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:26.397740', 'step': 8961, 'epoch': 2} {'type': 'loss', 'content': 0.1481986939907074, 'timestamp': '2025-09-10 02:36:26.399973', 'step': 8962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:26.430280', 'step': 8962, 'epoch': 2} {'type': 'loss', 'content': 0.11477188766002655, 'timestamp': '2025-09-10 02:36:26.432656', 'step': 8963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.462666', 'step': 8963, 'epoch': 2} {'type': 'loss', 'content': 0.0878649652004242, 'timestamp': '2025-09-10 02:36:26.486240', 'step': 8964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.517487', 'step': 8964, 'epoch': 2} {'type': 'loss', 'content': 0.13209018111228943, 'timestamp': '2025-09-10 02:36:26.520200', 'step': 8965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:26.551287', 'step': 8965, 'epoch': 2} {'type': 'loss', 'content': 0.12327994406223297, 'timestamp': '2025-09-10 02:36:26.553757', 'step': 8966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.583915', 'step': 8966, 'epoch': 2} {'type': 'loss', 'content': 0.059032902121543884, 'timestamp': '2025-09-10 02:36:26.586212', 'step': 8967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:26.615824', 'step': 8967, 'epoch': 2} {'type': 'loss', 'content': 0.11782405525445938, 'timestamp': '2025-09-10 02:36:26.639702', 'step': 8968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:26.670204', 'step': 8968, 'epoch': 2} {'type': 'loss', 'content': 0.16248883306980133, 'timestamp': '2025-09-10 02:36:26.672933', 'step': 8969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:26.704014', 'step': 8969, 'epoch': 2} {'type': 'loss', 'content': 0.09739547967910767, 'timestamp': '2025-09-10 02:36:26.706446', 'step': 8970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:36:26.738317', 'step': 8970, 'epoch': 2} {'type': 'loss', 'content': 0.07755469530820847, 'timestamp': '2025-09-10 02:36:26.742650', 'step': 8971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.774131', 'step': 8971, 'epoch': 2} {'type': 'loss', 'content': 0.10882747173309326, 'timestamp': '2025-09-10 02:36:26.797661', 'step': 8972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.827489', 'step': 8972, 'epoch': 2} {'type': 'loss', 'content': 0.10168888419866562, 'timestamp': '2025-09-10 02:36:26.830344', 'step': 8973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.859558', 'step': 8973, 'epoch': 2} {'type': 'loss', 'content': 0.21188384294509888, 'timestamp': '2025-09-10 02:36:26.862300', 'step': 8974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:26.892003', 'step': 8974, 'epoch': 2} {'type': 'loss', 'content': 0.06390053778886795, 'timestamp': '2025-09-10 02:36:26.894461', 'step': 8975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.925458', 'step': 8975, 'epoch': 2} {'type': 'loss', 'content': 0.16714832186698914, 'timestamp': '2025-09-10 02:36:26.949018', 'step': 8976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:26.980030', 'step': 8976, 'epoch': 2} {'type': 'loss', 'content': 0.06933111697435379, 'timestamp': '2025-09-10 02:36:26.982993', 'step': 8977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:27.014136', 'step': 8977, 'epoch': 2} {'type': 'loss', 'content': 0.12716586887836456, 'timestamp': '2025-09-10 02:36:27.016678', 'step': 8978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:27.047635', 'step': 8978, 'epoch': 2} {'type': 'loss', 'content': 0.23074117302894592, 'timestamp': '2025-09-10 02:36:27.049843', 'step': 8979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:27.080395', 'step': 8979, 'epoch': 2} {'type': 'loss', 'content': 0.060344718396663666, 'timestamp': '2025-09-10 02:36:27.104040', 'step': 8980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:27.135182', 'step': 8980, 'epoch': 2} {'type': 'loss', 'content': 0.027762077748775482, 'timestamp': '2025-09-10 02:36:27.137834', 'step': 8981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:27.167978', 'step': 8981, 'epoch': 2} {'type': 'loss', 'content': 0.10159123688936234, 'timestamp': '2025-09-10 02:36:27.170283', 'step': 8982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:27.201807', 'step': 8982, 'epoch': 2} {'type': 'loss', 'content': 0.10010021179914474, 'timestamp': '2025-09-10 02:36:27.204752', 'step': 8983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:27.234888', 'step': 8983, 'epoch': 2} {'type': 'loss', 'content': 0.1201949194073677, 'timestamp': '2025-09-10 02:36:27.258398', 'step': 8984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:27.290665', 'step': 8984, 'epoch': 2} {'type': 'loss', 'content': 0.15934903919696808, 'timestamp': '2025-09-10 02:36:27.293557', 'step': 8985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:27.324167', 'step': 8985, 'epoch': 2} {'type': 'loss', 'content': 0.14466890692710876, 'timestamp': '2025-09-10 02:36:27.326429', 'step': 8986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:27.356770', 'step': 8986, 'epoch': 2} {'type': 'loss', 'content': 0.12020126730203629, 'timestamp': '2025-09-10 02:36:27.358913', 'step': 8987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:27.388480', 'step': 8987, 'epoch': 2} {'type': 'loss', 'content': 0.09025239199399948, 'timestamp': '2025-09-10 02:36:27.411643', 'step': 8988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:27.441578', 'step': 8988, 'epoch': 2} {'type': 'loss', 'content': 0.1138446182012558, 'timestamp': '2025-09-10 02:36:27.443760', 'step': 8989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:27.474018', 'step': 8989, 'epoch': 2} {'type': 'loss', 'content': 0.07857276499271393, 'timestamp': '2025-09-10 02:36:27.476343', 'step': 8990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:27.507600', 'step': 8990, 'epoch': 2} {'type': 'loss', 'content': 0.1074017882347107, 'timestamp': '2025-09-10 02:36:27.509807', 'step': 8991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:27.539108', 'step': 8991, 'epoch': 2} {'type': 'loss', 'content': 0.0992017537355423, 'timestamp': '2025-09-10 02:36:27.562461', 'step': 8992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:27.593293', 'step': 8992, 'epoch': 2} {'type': 'loss', 'content': 0.10099725425243378, 'timestamp': '2025-09-10 02:36:27.595528', 'step': 8993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:27.626447', 'step': 8993, 'epoch': 2} {'type': 'loss', 'content': 0.061618201434612274, 'timestamp': '2025-09-10 02:36:27.628429', 'step': 8994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:27.658188', 'step': 8994, 'epoch': 2} {'type': 'loss', 'content': 0.14800997078418732, 'timestamp': '2025-09-10 02:36:27.660405', 'step': 8995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:27.691243', 'step': 8995, 'epoch': 2} {'type': 'loss', 'content': 0.04972373694181442, 'timestamp': '2025-09-10 02:36:27.714712', 'step': 8996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:27.744648', 'step': 8996, 'epoch': 2} {'type': 'loss', 'content': 0.06092807278037071, 'timestamp': '2025-09-10 02:36:27.746693', 'step': 8997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:27.776758', 'step': 8997, 'epoch': 2} {'type': 'loss', 'content': 0.2250845730304718, 'timestamp': '2025-09-10 02:36:27.779037', 'step': 8998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:27.809983', 'step': 8998, 'epoch': 2} {'type': 'loss', 'content': 0.10493310540914536, 'timestamp': '2025-09-10 02:36:27.812262', 'step': 8999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:27.842124', 'step': 8999, 'epoch': 2} {'type': 'loss', 'content': 0.10862916707992554, 'timestamp': '2025-09-10 02:36:27.865701', 'step': 9000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9000', 'timestamp': '2025-09-10 02:36:34.453891', 'step': 9000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:34.520463', 'step': 9000, 'epoch': 2} {'type': 'loss', 'content': 0.1783941090106964, 'timestamp': '2025-09-10 02:36:34.535719', 'step': 9001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:34.598412', 'step': 9001, 'epoch': 2} {'type': 'loss', 'content': 0.11285953968763351, 'timestamp': '2025-09-10 02:36:34.619687', 'step': 9002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:34.690704', 'step': 9002, 'epoch': 2} {'type': 'loss', 'content': 0.06873968988656998, 'timestamp': '2025-09-10 02:36:34.696861', 'step': 9003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:34.728965', 'step': 9003, 'epoch': 2} {'type': 'loss', 'content': 0.09396054595708847, 'timestamp': '2025-09-10 02:36:34.754637', 'step': 9004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:34.794170', 'step': 9004, 'epoch': 2} {'type': 'loss', 'content': 0.20126530528068542, 'timestamp': '2025-09-10 02:36:34.805136', 'step': 9005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:34.850793', 'step': 9005, 'epoch': 2} {'type': 'loss', 'content': 0.047453515231609344, 'timestamp': '2025-09-10 02:36:34.853543', 'step': 9006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:34.897043', 'step': 9006, 'epoch': 2} {'type': 'loss', 'content': 0.07798610627651215, 'timestamp': '2025-09-10 02:36:34.904849', 'step': 9007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:34.951618', 'step': 9007, 'epoch': 2} {'type': 'loss', 'content': 0.08998748660087585, 'timestamp': '2025-09-10 02:36:34.975743', 'step': 9008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:35.018928', 'step': 9008, 'epoch': 2} {'type': 'loss', 'content': 0.1369698941707611, 'timestamp': '2025-09-10 02:36:35.021817', 'step': 9009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:35.057295', 'step': 9009, 'epoch': 2} {'type': 'loss', 'content': 0.08366616815328598, 'timestamp': '2025-09-10 02:36:35.062455', 'step': 9010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:35.103750', 'step': 9010, 'epoch': 2} {'type': 'loss', 'content': 0.09434832632541656, 'timestamp': '2025-09-10 02:36:35.106239', 'step': 9011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:35.136792', 'step': 9011, 'epoch': 2} {'type': 'loss', 'content': 0.032425034791231155, 'timestamp': '2025-09-10 02:36:35.161582', 'step': 9012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:35.218947', 'step': 9012, 'epoch': 2} {'type': 'loss', 'content': 0.1401793211698532, 'timestamp': '2025-09-10 02:36:35.231995', 'step': 9013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:35.300795', 'step': 9013, 'epoch': 2} {'type': 'loss', 'content': 0.13491061329841614, 'timestamp': '2025-09-10 02:36:35.317333', 'step': 9014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:35.391499', 'step': 9014, 'epoch': 2} {'type': 'loss', 'content': 0.16878387331962585, 'timestamp': '2025-09-10 02:36:35.408574', 'step': 9015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:35.482030', 'step': 9015, 'epoch': 2} {'type': 'loss', 'content': 0.13605353236198425, 'timestamp': '2025-09-10 02:36:35.518840', 'step': 9016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:35.580877', 'step': 9016, 'epoch': 2} {'type': 'loss', 'content': 0.09820897877216339, 'timestamp': '2025-09-10 02:36:35.586005', 'step': 9017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:35.628496', 'step': 9017, 'epoch': 2} {'type': 'loss', 'content': 0.062186844646930695, 'timestamp': '2025-09-10 02:36:35.634681', 'step': 9018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:35.684768', 'step': 9018, 'epoch': 2} {'type': 'loss', 'content': 0.1122359037399292, 'timestamp': '2025-09-10 02:36:35.691292', 'step': 9019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:35.734010', 'step': 9019, 'epoch': 2} {'type': 'loss', 'content': 0.12899115681648254, 'timestamp': '2025-09-10 02:36:35.761785', 'step': 9020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:35.806408', 'step': 9020, 'epoch': 2} {'type': 'loss', 'content': 0.08491252362728119, 'timestamp': '2025-09-10 02:36:35.812574', 'step': 9021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:35.857033', 'step': 9021, 'epoch': 2} {'type': 'loss', 'content': 0.054722025990486145, 'timestamp': '2025-09-10 02:36:35.859409', 'step': 9022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:35.889808', 'step': 9022, 'epoch': 2} {'type': 'loss', 'content': 0.10426454246044159, 'timestamp': '2025-09-10 02:36:35.896034', 'step': 9023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:35.940513', 'step': 9023, 'epoch': 2} {'type': 'loss', 'content': 0.058116886764764786, 'timestamp': '2025-09-10 02:36:35.975165', 'step': 9024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:36.008508', 'step': 9024, 'epoch': 2} {'type': 'loss', 'content': 0.09116803854703903, 'timestamp': '2025-09-10 02:36:36.012847', 'step': 9025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:36.056889', 'step': 9025, 'epoch': 2} {'type': 'loss', 'content': 0.14922259747982025, 'timestamp': '2025-09-10 02:36:36.060600', 'step': 9026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:36.098368', 'step': 9026, 'epoch': 2} {'type': 'loss', 'content': 0.05793854966759682, 'timestamp': '2025-09-10 02:36:36.100765', 'step': 9027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:36.131267', 'step': 9027, 'epoch': 2} {'type': 'loss', 'content': 0.10510382056236267, 'timestamp': '2025-09-10 02:36:36.155012', 'step': 9028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:36.186232', 'step': 9028, 'epoch': 2} {'type': 'loss', 'content': 0.1352880895137787, 'timestamp': '2025-09-10 02:36:36.189311', 'step': 9029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:36.225418', 'step': 9029, 'epoch': 2} {'type': 'loss', 'content': 0.14627140760421753, 'timestamp': '2025-09-10 02:36:36.238224', 'step': 9030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:36.305294', 'step': 9030, 'epoch': 2} {'type': 'loss', 'content': 0.10293286293745041, 'timestamp': '2025-09-10 02:36:36.318132', 'step': 9031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:36.376793', 'step': 9031, 'epoch': 2} {'type': 'loss', 'content': 0.22972825169563293, 'timestamp': '2025-09-10 02:36:36.404880', 'step': 9032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:36.447181', 'step': 9032, 'epoch': 2} {'type': 'loss', 'content': 0.09317179024219513, 'timestamp': '2025-09-10 02:36:36.451279', 'step': 9033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:36:36.486424', 'step': 9033, 'epoch': 2} {'type': 'loss', 'content': 0.07583257555961609, 'timestamp': '2025-09-10 02:36:36.491235', 'step': 9034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:36.527696', 'step': 9034, 'epoch': 2} {'type': 'loss', 'content': 0.12258712947368622, 'timestamp': '2025-09-10 02:36:36.532241', 'step': 9035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:36.568956', 'step': 9035, 'epoch': 2} {'type': 'loss', 'content': 0.1447390764951706, 'timestamp': '2025-09-10 02:36:36.595199', 'step': 9036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:36.633203', 'step': 9036, 'epoch': 2} {'type': 'loss', 'content': 0.1502833217382431, 'timestamp': '2025-09-10 02:36:36.642996', 'step': 9037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:36.697184', 'step': 9037, 'epoch': 2} {'type': 'loss', 'content': 0.11931837350130081, 'timestamp': '2025-09-10 02:36:36.706254', 'step': 9038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:36.754544', 'step': 9038, 'epoch': 2} {'type': 'loss', 'content': 0.10619232058525085, 'timestamp': '2025-09-10 02:36:36.762581', 'step': 9039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:36.826423', 'step': 9039, 'epoch': 2} {'type': 'loss', 'content': 0.0955212265253067, 'timestamp': '2025-09-10 02:36:36.861877', 'step': 9040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:36.921900', 'step': 9040, 'epoch': 2} {'type': 'loss', 'content': 0.08581959456205368, 'timestamp': '2025-09-10 02:36:36.932913', 'step': 9041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:36.996239', 'step': 9041, 'epoch': 2} {'type': 'loss', 'content': 0.1844097524881363, 'timestamp': '2025-09-10 02:36:37.009924', 'step': 9042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:37.069407', 'step': 9042, 'epoch': 2} {'type': 'loss', 'content': 0.1672353595495224, 'timestamp': '2025-09-10 02:36:37.079420', 'step': 9043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:37.135138', 'step': 9043, 'epoch': 2} {'type': 'loss', 'content': 0.07304684817790985, 'timestamp': '2025-09-10 02:36:37.165684', 'step': 9044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:37.214923', 'step': 9044, 'epoch': 2} {'type': 'loss', 'content': 0.12626786530017853, 'timestamp': '2025-09-10 02:36:37.222595', 'step': 9045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:37.270351', 'step': 9045, 'epoch': 2} {'type': 'loss', 'content': 0.1223444789648056, 'timestamp': '2025-09-10 02:36:37.276431', 'step': 9046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:37.316998', 'step': 9046, 'epoch': 2} {'type': 'loss', 'content': 0.1562066674232483, 'timestamp': '2025-09-10 02:36:37.326314', 'step': 9047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:37.370944', 'step': 9047, 'epoch': 2} {'type': 'loss', 'content': 0.08506309241056442, 'timestamp': '2025-09-10 02:36:37.398220', 'step': 9048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:37.447372', 'step': 9048, 'epoch': 2} {'type': 'loss', 'content': 0.09027638286352158, 'timestamp': '2025-09-10 02:36:37.456261', 'step': 9049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:37.508790', 'step': 9049, 'epoch': 2} {'type': 'loss', 'content': 0.16541679203510284, 'timestamp': '2025-09-10 02:36:37.515886', 'step': 9050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:37.567867', 'step': 9050, 'epoch': 2} {'type': 'loss', 'content': 0.12451556324958801, 'timestamp': '2025-09-10 02:36:37.571075', 'step': 9051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:37.604792', 'step': 9051, 'epoch': 2} {'type': 'loss', 'content': 0.17574675381183624, 'timestamp': '2025-09-10 02:36:37.629664', 'step': 9052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:37.672469', 'step': 9052, 'epoch': 2} {'type': 'loss', 'content': 0.10204249620437622, 'timestamp': '2025-09-10 02:36:37.687361', 'step': 9053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:37.761810', 'step': 9053, 'epoch': 2} {'type': 'loss', 'content': 0.18344195187091827, 'timestamp': '2025-09-10 02:36:37.779328', 'step': 9054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:37.864639', 'step': 9054, 'epoch': 2} {'type': 'loss', 'content': 0.07631652057170868, 'timestamp': '2025-09-10 02:36:37.885268', 'step': 9055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:37.953714', 'step': 9055, 'epoch': 2} {'type': 'loss', 'content': 0.12714341282844543, 'timestamp': '2025-09-10 02:36:37.983930', 'step': 9056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:38.024031', 'step': 9056, 'epoch': 2} {'type': 'loss', 'content': 0.1495719999074936, 'timestamp': '2025-09-10 02:36:38.027401', 'step': 9057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:38.068887', 'step': 9057, 'epoch': 2} {'type': 'loss', 'content': 0.08432367444038391, 'timestamp': '2025-09-10 02:36:38.075504', 'step': 9058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:38.109713', 'step': 9058, 'epoch': 2} {'type': 'loss', 'content': 0.15711452066898346, 'timestamp': '2025-09-10 02:36:38.115509', 'step': 9059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:38.152462', 'step': 9059, 'epoch': 2} {'type': 'loss', 'content': 0.14937882125377655, 'timestamp': '2025-09-10 02:36:38.177602', 'step': 9060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:38.215486', 'step': 9060, 'epoch': 2} {'type': 'loss', 'content': 0.09240841865539551, 'timestamp': '2025-09-10 02:36:38.219081', 'step': 9061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:38.252808', 'step': 9061, 'epoch': 2} {'type': 'loss', 'content': 0.12861138582229614, 'timestamp': '2025-09-10 02:36:38.256878', 'step': 9062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:36:38.288067', 'step': 9062, 'epoch': 2} {'type': 'loss', 'content': 0.09511895477771759, 'timestamp': '2025-09-10 02:36:38.292455', 'step': 9063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:38.323518', 'step': 9063, 'epoch': 2} {'type': 'loss', 'content': 0.1203121468424797, 'timestamp': '2025-09-10 02:36:38.347390', 'step': 9064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:38.377893', 'step': 9064, 'epoch': 2} {'type': 'loss', 'content': 0.08076813071966171, 'timestamp': '2025-09-10 02:36:38.380518', 'step': 9065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:38.411389', 'step': 9065, 'epoch': 2} {'type': 'loss', 'content': 0.1063050702214241, 'timestamp': '2025-09-10 02:36:38.413622', 'step': 9066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:38.444194', 'step': 9066, 'epoch': 2} {'type': 'loss', 'content': 0.05757710337638855, 'timestamp': '2025-09-10 02:36:38.449230', 'step': 9067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:38.480680', 'step': 9067, 'epoch': 2} {'type': 'loss', 'content': 0.1549721658229828, 'timestamp': '2025-09-10 02:36:38.505288', 'step': 9068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:38.536010', 'step': 9068, 'epoch': 2} {'type': 'loss', 'content': 0.11537129431962967, 'timestamp': '2025-09-10 02:36:38.538840', 'step': 9069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:38.571447', 'step': 9069, 'epoch': 2} {'type': 'loss', 'content': 0.09809904545545578, 'timestamp': '2025-09-10 02:36:38.573694', 'step': 9070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:38.603476', 'step': 9070, 'epoch': 2} {'type': 'loss', 'content': 0.1615365892648697, 'timestamp': '2025-09-10 02:36:38.606055', 'step': 9071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:38.635792', 'step': 9071, 'epoch': 2} {'type': 'loss', 'content': 0.10402913391590118, 'timestamp': '2025-09-10 02:36:38.659509', 'step': 9072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:38.690218', 'step': 9072, 'epoch': 2} {'type': 'loss', 'content': 0.10560312122106552, 'timestamp': '2025-09-10 02:36:38.692918', 'step': 9073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:38.729327', 'step': 9073, 'epoch': 2} {'type': 'loss', 'content': 0.09013460576534271, 'timestamp': '2025-09-10 02:36:38.733359', 'step': 9074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:38.768519', 'step': 9074, 'epoch': 2} {'type': 'loss', 'content': 0.2513890564441681, 'timestamp': '2025-09-10 02:36:38.771530', 'step': 9075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:38.804264', 'step': 9075, 'epoch': 2} {'type': 'loss', 'content': 0.09681516140699387, 'timestamp': '2025-09-10 02:36:38.827876', 'step': 9076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:38.860935', 'step': 9076, 'epoch': 2} {'type': 'loss', 'content': 0.1331818550825119, 'timestamp': '2025-09-10 02:36:38.872662', 'step': 9077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:38.933413', 'step': 9077, 'epoch': 2} {'type': 'loss', 'content': 0.12469075620174408, 'timestamp': '2025-09-10 02:36:38.946884', 'step': 9078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:38.988904', 'step': 9078, 'epoch': 2} {'type': 'loss', 'content': 0.08022188395261765, 'timestamp': '2025-09-10 02:36:38.999669', 'step': 9079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:39.058208', 'step': 9079, 'epoch': 2} {'type': 'loss', 'content': 0.09404603391885757, 'timestamp': '2025-09-10 02:36:39.082651', 'step': 9080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:39.126562', 'step': 9080, 'epoch': 2} {'type': 'loss', 'content': 0.15926460921764374, 'timestamp': '2025-09-10 02:36:39.131508', 'step': 9081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:39.173180', 'step': 9081, 'epoch': 2} {'type': 'loss', 'content': 0.11353466659784317, 'timestamp': '2025-09-10 02:36:39.177450', 'step': 9082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:39.212397', 'step': 9082, 'epoch': 2} {'type': 'loss', 'content': 0.13709786534309387, 'timestamp': '2025-09-10 02:36:39.216573', 'step': 9083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:39.252318', 'step': 9083, 'epoch': 2} {'type': 'loss', 'content': 0.15983770787715912, 'timestamp': '2025-09-10 02:36:39.278380', 'step': 9084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:39.333691', 'step': 9084, 'epoch': 2} {'type': 'loss', 'content': 0.0697481706738472, 'timestamp': '2025-09-10 02:36:39.338920', 'step': 9085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:39.381525', 'step': 9085, 'epoch': 2} {'type': 'loss', 'content': 0.09638695418834686, 'timestamp': '2025-09-10 02:36:39.385748', 'step': 9086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:39.419546', 'step': 9086, 'epoch': 2} {'type': 'loss', 'content': 0.1134919598698616, 'timestamp': '2025-09-10 02:36:39.423569', 'step': 9087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:39.455227', 'step': 9087, 'epoch': 2} {'type': 'loss', 'content': 0.08397932350635529, 'timestamp': '2025-09-10 02:36:39.478449', 'step': 9088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:39.514886', 'step': 9088, 'epoch': 2} {'type': 'loss', 'content': 0.09365825355052948, 'timestamp': '2025-09-10 02:36:39.521598', 'step': 9089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:39.569170', 'step': 9089, 'epoch': 2} {'type': 'loss', 'content': 0.16581493616104126, 'timestamp': '2025-09-10 02:36:39.575201', 'step': 9090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:39.614242', 'step': 9090, 'epoch': 2} {'type': 'loss', 'content': 0.05000414699316025, 'timestamp': '2025-09-10 02:36:39.616495', 'step': 9091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:39.646566', 'step': 9091, 'epoch': 2} {'type': 'loss', 'content': 0.10877608507871628, 'timestamp': '2025-09-10 02:36:39.670659', 'step': 9092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:39.704715', 'step': 9092, 'epoch': 2} {'type': 'loss', 'content': 0.08938884735107422, 'timestamp': '2025-09-10 02:36:39.715621', 'step': 9093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:39.781736', 'step': 9093, 'epoch': 2} {'type': 'loss', 'content': 0.1448574662208557, 'timestamp': '2025-09-10 02:36:39.798426', 'step': 9094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:39.836726', 'step': 9094, 'epoch': 2} {'type': 'loss', 'content': 0.10812689363956451, 'timestamp': '2025-09-10 02:36:39.840221', 'step': 9095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:39.873571', 'step': 9095, 'epoch': 2} {'type': 'loss', 'content': 0.1082051545381546, 'timestamp': '2025-09-10 02:36:39.898414', 'step': 9096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:39.933133', 'step': 9096, 'epoch': 2} {'type': 'loss', 'content': 0.16236437857151031, 'timestamp': '2025-09-10 02:36:39.936202', 'step': 9097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:39.969840', 'step': 9097, 'epoch': 2} {'type': 'loss', 'content': 0.23026703298091888, 'timestamp': '2025-09-10 02:36:39.975854', 'step': 9098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:40.021044', 'step': 9098, 'epoch': 2} {'type': 'loss', 'content': 0.19870492815971375, 'timestamp': '2025-09-10 02:36:40.031427', 'step': 9099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:40.074210', 'step': 9099, 'epoch': 2} {'type': 'loss', 'content': 0.11184033751487732, 'timestamp': '2025-09-10 02:36:40.101168', 'step': 9100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:40.143076', 'step': 9100, 'epoch': 2} {'type': 'loss', 'content': 0.196707084774971, 'timestamp': '2025-09-10 02:36:40.154578', 'step': 9101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:40.205686', 'step': 9101, 'epoch': 2} {'type': 'loss', 'content': 0.08916955441236496, 'timestamp': '2025-09-10 02:36:40.222581', 'step': 9102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.284250', 'step': 9102, 'epoch': 2} {'type': 'loss', 'content': 0.11006501317024231, 'timestamp': '2025-09-10 02:36:40.301288', 'step': 9103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:40.372072', 'step': 9103, 'epoch': 2} {'type': 'loss', 'content': 0.18941950798034668, 'timestamp': '2025-09-10 02:36:40.398507', 'step': 9104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:40.438801', 'step': 9104, 'epoch': 2} {'type': 'loss', 'content': 0.09539035707712173, 'timestamp': '2025-09-10 02:36:40.442077', 'step': 9105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.474461', 'step': 9105, 'epoch': 2} {'type': 'loss', 'content': 0.049297742545604706, 'timestamp': '2025-09-10 02:36:40.481404', 'step': 9106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.521000', 'step': 9106, 'epoch': 2} {'type': 'loss', 'content': 0.06605300307273865, 'timestamp': '2025-09-10 02:36:40.527255', 'step': 9107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:40.568858', 'step': 9107, 'epoch': 2} {'type': 'loss', 'content': 0.10969076305627823, 'timestamp': '2025-09-10 02:36:40.592446', 'step': 9108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.623255', 'step': 9108, 'epoch': 2} {'type': 'loss', 'content': 0.11896654218435287, 'timestamp': '2025-09-10 02:36:40.625532', 'step': 9109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.655489', 'step': 9109, 'epoch': 2} {'type': 'loss', 'content': 0.14534714818000793, 'timestamp': '2025-09-10 02:36:40.657692', 'step': 9110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.687372', 'step': 9110, 'epoch': 2} {'type': 'loss', 'content': 0.13290657103061676, 'timestamp': '2025-09-10 02:36:40.689535', 'step': 9111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.721389', 'step': 9111, 'epoch': 2} {'type': 'loss', 'content': 0.059916649013757706, 'timestamp': '2025-09-10 02:36:40.745034', 'step': 9112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:40.774884', 'step': 9112, 'epoch': 2} {'type': 'loss', 'content': 0.15637293457984924, 'timestamp': '2025-09-10 02:36:40.777110', 'step': 9113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.806970', 'step': 9113, 'epoch': 2} {'type': 'loss', 'content': 0.06951624155044556, 'timestamp': '2025-09-10 02:36:40.809568', 'step': 9114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.841186', 'step': 9114, 'epoch': 2} {'type': 'loss', 'content': 0.15860576927661896, 'timestamp': '2025-09-10 02:36:40.843422', 'step': 9115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:40.874765', 'step': 9115, 'epoch': 2} {'type': 'loss', 'content': 0.11558939516544342, 'timestamp': '2025-09-10 02:36:40.900684', 'step': 9116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:40.930510', 'step': 9116, 'epoch': 2} {'type': 'loss', 'content': 0.07543586939573288, 'timestamp': '2025-09-10 02:36:40.933348', 'step': 9117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:40.965179', 'step': 9117, 'epoch': 2} {'type': 'loss', 'content': 0.08227606862783432, 'timestamp': '2025-09-10 02:36:40.969279', 'step': 9118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:40.999594', 'step': 9118, 'epoch': 2} {'type': 'loss', 'content': 0.11242401599884033, 'timestamp': '2025-09-10 02:36:41.002228', 'step': 9119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:41.033386', 'step': 9119, 'epoch': 2} {'type': 'loss', 'content': 0.16376842558383942, 'timestamp': '2025-09-10 02:36:41.057010', 'step': 9120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:41.088780', 'step': 9120, 'epoch': 2} {'type': 'loss', 'content': 0.05547216907143593, 'timestamp': '2025-09-10 02:36:41.090964', 'step': 9121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:41.120928', 'step': 9121, 'epoch': 2} {'type': 'loss', 'content': 0.131265789270401, 'timestamp': '2025-09-10 02:36:41.123243', 'step': 9122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:41.153213', 'step': 9122, 'epoch': 2} {'type': 'loss', 'content': 0.07794239372015, 'timestamp': '2025-09-10 02:36:41.155809', 'step': 9123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:41.186718', 'step': 9123, 'epoch': 2} {'type': 'loss', 'content': 0.09904265403747559, 'timestamp': '2025-09-10 02:36:41.210497', 'step': 9124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:41.243717', 'step': 9124, 'epoch': 2} {'type': 'loss', 'content': 0.09250012785196304, 'timestamp': '2025-09-10 02:36:41.245970', 'step': 9125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:41.276432', 'step': 9125, 'epoch': 2} {'type': 'loss', 'content': 0.09469614177942276, 'timestamp': '2025-09-10 02:36:41.280851', 'step': 9126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:41.311417', 'step': 9126, 'epoch': 2} {'type': 'loss', 'content': 0.16531330347061157, 'timestamp': '2025-09-10 02:36:41.313563', 'step': 9127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:41.344920', 'step': 9127, 'epoch': 2} {'type': 'loss', 'content': 0.14149296283721924, 'timestamp': '2025-09-10 02:36:41.368483', 'step': 9128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:41.399043', 'step': 9128, 'epoch': 2} {'type': 'loss', 'content': 0.043301910161972046, 'timestamp': '2025-09-10 02:36:41.401453', 'step': 9129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:41.431350', 'step': 9129, 'epoch': 2} {'type': 'loss', 'content': 0.10318364948034286, 'timestamp': '2025-09-10 02:36:41.433725', 'step': 9130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:41.465252', 'step': 9130, 'epoch': 2} {'type': 'loss', 'content': 0.13643096387386322, 'timestamp': '2025-09-10 02:36:41.467628', 'step': 9131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:41.498744', 'step': 9131, 'epoch': 2} {'type': 'loss', 'content': 0.13033096492290497, 'timestamp': '2025-09-10 02:36:41.522292', 'step': 9132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:41.552632', 'step': 9132, 'epoch': 2} {'type': 'loss', 'content': 0.09229741245508194, 'timestamp': '2025-09-10 02:36:41.555140', 'step': 9133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:41.585721', 'step': 9133, 'epoch': 2} {'type': 'loss', 'content': 0.09833836555480957, 'timestamp': '2025-09-10 02:36:41.588420', 'step': 9134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:41.620153', 'step': 9134, 'epoch': 2} {'type': 'loss', 'content': 0.16784264147281647, 'timestamp': '2025-09-10 02:36:41.623980', 'step': 9135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:41.655662', 'step': 9135, 'epoch': 2} {'type': 'loss', 'content': 0.10944148898124695, 'timestamp': '2025-09-10 02:36:41.678988', 'step': 9136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:41.708909', 'step': 9136, 'epoch': 2} {'type': 'loss', 'content': 0.07307914644479752, 'timestamp': '2025-09-10 02:36:41.711370', 'step': 9137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:41.741227', 'step': 9137, 'epoch': 2} {'type': 'loss', 'content': 0.11256805807352066, 'timestamp': '2025-09-10 02:36:41.743613', 'step': 9138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:36:41.774267', 'step': 9138, 'epoch': 2} {'type': 'loss', 'content': 0.09677985310554504, 'timestamp': '2025-09-10 02:36:41.781437', 'step': 9139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:41.810997', 'step': 9139, 'epoch': 2} {'type': 'loss', 'content': 0.14884509146213531, 'timestamp': '2025-09-10 02:36:41.834747', 'step': 9140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:41.865858', 'step': 9140, 'epoch': 2} {'type': 'loss', 'content': 0.09647420048713684, 'timestamp': '2025-09-10 02:36:41.869400', 'step': 9141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:41.902402', 'step': 9141, 'epoch': 2} {'type': 'loss', 'content': 0.062475986778736115, 'timestamp': '2025-09-10 02:36:41.904592', 'step': 9142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:41.935209', 'step': 9142, 'epoch': 2} {'type': 'loss', 'content': 0.08777176588773727, 'timestamp': '2025-09-10 02:36:41.937583', 'step': 9143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:41.968188', 'step': 9143, 'epoch': 2} {'type': 'loss', 'content': 0.12402687966823578, 'timestamp': '2025-09-10 02:36:41.991962', 'step': 9144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:42.022053', 'step': 9144, 'epoch': 2} {'type': 'loss', 'content': 0.19698718190193176, 'timestamp': '2025-09-10 02:36:42.024922', 'step': 9145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:42.057243', 'step': 9145, 'epoch': 2} {'type': 'loss', 'content': 0.06147770211100578, 'timestamp': '2025-09-10 02:36:42.060943', 'step': 9146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:42.092895', 'step': 9146, 'epoch': 2} {'type': 'loss', 'content': 0.20216386020183563, 'timestamp': '2025-09-10 02:36:42.097529', 'step': 9147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:42.136041', 'step': 9147, 'epoch': 2} {'type': 'loss', 'content': 0.044197771698236465, 'timestamp': '2025-09-10 02:36:42.160937', 'step': 9148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:42.191155', 'step': 9148, 'epoch': 2} {'type': 'loss', 'content': 0.06189211830496788, 'timestamp': '2025-09-10 02:36:42.193531', 'step': 9149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:42.223572', 'step': 9149, 'epoch': 2} {'type': 'loss', 'content': 0.14867964386940002, 'timestamp': '2025-09-10 02:36:42.226841', 'step': 9150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:42.258579', 'step': 9150, 'epoch': 2} {'type': 'loss', 'content': 0.19364164769649506, 'timestamp': '2025-09-10 02:36:42.261088', 'step': 9151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:42.290803', 'step': 9151, 'epoch': 2} {'type': 'loss', 'content': 0.09683743864297867, 'timestamp': '2025-09-10 02:36:42.317461', 'step': 9152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:42.347855', 'step': 9152, 'epoch': 2} {'type': 'loss', 'content': 0.1761121153831482, 'timestamp': '2025-09-10 02:36:42.350490', 'step': 9153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:42.382444', 'step': 9153, 'epoch': 2} {'type': 'loss', 'content': 0.09345682710409164, 'timestamp': '2025-09-10 02:36:42.384820', 'step': 9154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:42.416253', 'step': 9154, 'epoch': 2} {'type': 'loss', 'content': 0.11394873261451721, 'timestamp': '2025-09-10 02:36:42.418995', 'step': 9155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:42.448893', 'step': 9155, 'epoch': 2} {'type': 'loss', 'content': 0.07597815990447998, 'timestamp': '2025-09-10 02:36:42.472172', 'step': 9156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:42.503221', 'step': 9156, 'epoch': 2} {'type': 'loss', 'content': 0.10835560411214828, 'timestamp': '2025-09-10 02:36:42.505702', 'step': 9157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:42.536297', 'step': 9157, 'epoch': 2} {'type': 'loss', 'content': 0.0822434052824974, 'timestamp': '2025-09-10 02:36:42.538651', 'step': 9158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:42.569736', 'step': 9158, 'epoch': 2} {'type': 'loss', 'content': 0.05802398920059204, 'timestamp': '2025-09-10 02:36:42.572123', 'step': 9159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:42.602267', 'step': 9159, 'epoch': 2} {'type': 'loss', 'content': 0.1816273033618927, 'timestamp': '2025-09-10 02:36:42.625616', 'step': 9160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:42.655747', 'step': 9160, 'epoch': 2} {'type': 'loss', 'content': 0.050170909613370895, 'timestamp': '2025-09-10 02:36:42.658223', 'step': 9161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:42.688125', 'step': 9161, 'epoch': 2} {'type': 'loss', 'content': 0.09943225234746933, 'timestamp': '2025-09-10 02:36:42.690613', 'step': 9162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:42.720485', 'step': 9162, 'epoch': 2} {'type': 'loss', 'content': 0.0640462338924408, 'timestamp': '2025-09-10 02:36:42.722801', 'step': 9163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:42.752454', 'step': 9163, 'epoch': 2} {'type': 'loss', 'content': 0.0670652911067009, 'timestamp': '2025-09-10 02:36:42.776613', 'step': 9164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:42.807233', 'step': 9164, 'epoch': 2} {'type': 'loss', 'content': 0.2179655134677887, 'timestamp': '2025-09-10 02:36:42.810253', 'step': 9165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:42.840224', 'step': 9165, 'epoch': 2} {'type': 'loss', 'content': 0.13263940811157227, 'timestamp': '2025-09-10 02:36:42.842870', 'step': 9166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:42.873572', 'step': 9166, 'epoch': 2} {'type': 'loss', 'content': 0.08187182992696762, 'timestamp': '2025-09-10 02:36:42.876315', 'step': 9167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:42.906666', 'step': 9167, 'epoch': 2} {'type': 'loss', 'content': 0.09445496648550034, 'timestamp': '2025-09-10 02:36:42.930279', 'step': 9168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:42.961056', 'step': 9168, 'epoch': 2} {'type': 'loss', 'content': 0.10388058423995972, 'timestamp': '2025-09-10 02:36:42.963311', 'step': 9169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:42.992947', 'step': 9169, 'epoch': 2} {'type': 'loss', 'content': 0.10823845118284225, 'timestamp': '2025-09-10 02:36:42.995267', 'step': 9170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:36:43.026597', 'step': 9170, 'epoch': 2} {'type': 'loss', 'content': 0.09393221884965897, 'timestamp': '2025-09-10 02:36:43.030970', 'step': 9171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:43.061450', 'step': 9171, 'epoch': 2} {'type': 'loss', 'content': 0.1264001727104187, 'timestamp': '2025-09-10 02:36:43.085187', 'step': 9172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:43.115666', 'step': 9172, 'epoch': 2} {'type': 'loss', 'content': 0.04475034028291702, 'timestamp': '2025-09-10 02:36:43.117867', 'step': 9173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:43.147809', 'step': 9173, 'epoch': 2} {'type': 'loss', 'content': 0.210571750998497, 'timestamp': '2025-09-10 02:36:43.150101', 'step': 9174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:43.180324', 'step': 9174, 'epoch': 2} {'type': 'loss', 'content': 0.24244220554828644, 'timestamp': '2025-09-10 02:36:43.182587', 'step': 9175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:43.212920', 'step': 9175, 'epoch': 2} {'type': 'loss', 'content': 0.06327902525663376, 'timestamp': '2025-09-10 02:36:43.236858', 'step': 9176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:43.269084', 'step': 9176, 'epoch': 2} {'type': 'loss', 'content': 0.06803338974714279, 'timestamp': '2025-09-10 02:36:43.271362', 'step': 9177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:43.302326', 'step': 9177, 'epoch': 2} {'type': 'loss', 'content': 0.11553004384040833, 'timestamp': '2025-09-10 02:36:43.304609', 'step': 9178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:43.334978', 'step': 9178, 'epoch': 2} {'type': 'loss', 'content': 0.26611924171447754, 'timestamp': '2025-09-10 02:36:43.337368', 'step': 9179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:43.367634', 'step': 9179, 'epoch': 2} {'type': 'loss', 'content': 0.07980990409851074, 'timestamp': '2025-09-10 02:36:43.391130', 'step': 9180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:43.421120', 'step': 9180, 'epoch': 2} {'type': 'loss', 'content': 0.06121399253606796, 'timestamp': '2025-09-10 02:36:43.423446', 'step': 9181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:43.453358', 'step': 9181, 'epoch': 2} {'type': 'loss', 'content': 0.08668877929449081, 'timestamp': '2025-09-10 02:36:43.456465', 'step': 9182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:43.487277', 'step': 9182, 'epoch': 2} {'type': 'loss', 'content': 0.14723379909992218, 'timestamp': '2025-09-10 02:36:43.489776', 'step': 9183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:43.524413', 'step': 9183, 'epoch': 2} {'type': 'loss', 'content': 0.1139722615480423, 'timestamp': '2025-09-10 02:36:43.549210', 'step': 9184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:43.582080', 'step': 9184, 'epoch': 2} {'type': 'loss', 'content': 0.06877799332141876, 'timestamp': '2025-09-10 02:36:43.584250', 'step': 9185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:43.613483', 'step': 9185, 'epoch': 2} {'type': 'loss', 'content': 0.08892464637756348, 'timestamp': '2025-09-10 02:36:43.615714', 'step': 9186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:43.646113', 'step': 9186, 'epoch': 2} {'type': 'loss', 'content': 0.12443842738866806, 'timestamp': '2025-09-10 02:36:43.650537', 'step': 9187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:43.681909', 'step': 9187, 'epoch': 2} {'type': 'loss', 'content': 0.09017516672611237, 'timestamp': '2025-09-10 02:36:43.705742', 'step': 9188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:43.737191', 'step': 9188, 'epoch': 2} {'type': 'loss', 'content': 0.17113125324249268, 'timestamp': '2025-09-10 02:36:43.739852', 'step': 9189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:43.770135', 'step': 9189, 'epoch': 2} {'type': 'loss', 'content': 0.09872625023126602, 'timestamp': '2025-09-10 02:36:43.772517', 'step': 9190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:43.802892', 'step': 9190, 'epoch': 2} {'type': 'loss', 'content': 0.056196313351392746, 'timestamp': '2025-09-10 02:36:43.806706', 'step': 9191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:43.838345', 'step': 9191, 'epoch': 2} {'type': 'loss', 'content': 0.12780524790287018, 'timestamp': '2025-09-10 02:36:43.861887', 'step': 9192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:43.895584', 'step': 9192, 'epoch': 2} {'type': 'loss', 'content': 0.19293871521949768, 'timestamp': '2025-09-10 02:36:43.897990', 'step': 9193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:43.927963', 'step': 9193, 'epoch': 2} {'type': 'loss', 'content': 0.14737965166568756, 'timestamp': '2025-09-10 02:36:43.930366', 'step': 9194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:43.960391', 'step': 9194, 'epoch': 2} {'type': 'loss', 'content': 0.1310293972492218, 'timestamp': '2025-09-10 02:36:43.962497', 'step': 9195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:43.992139', 'step': 9195, 'epoch': 2} {'type': 'loss', 'content': 0.11484331637620926, 'timestamp': '2025-09-10 02:36:44.015686', 'step': 9196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:44.045625', 'step': 9196, 'epoch': 2} {'type': 'loss', 'content': 0.07542378455400467, 'timestamp': '2025-09-10 02:36:44.047820', 'step': 9197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:44.079701', 'step': 9197, 'epoch': 2} {'type': 'loss', 'content': 0.11550845950841904, 'timestamp': '2025-09-10 02:36:44.081977', 'step': 9198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:44.114321', 'step': 9198, 'epoch': 2} {'type': 'loss', 'content': 0.14046630263328552, 'timestamp': '2025-09-10 02:36:44.117841', 'step': 9199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:44.150474', 'step': 9199, 'epoch': 2} {'type': 'loss', 'content': 0.175352543592453, 'timestamp': '2025-09-10 02:36:44.174077', 'step': 9200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.203846', 'step': 9200, 'epoch': 2} {'type': 'loss', 'content': 0.06716056913137436, 'timestamp': '2025-09-10 02:36:44.206096', 'step': 9201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:44.235875', 'step': 9201, 'epoch': 2} {'type': 'loss', 'content': 0.09999937564134598, 'timestamp': '2025-09-10 02:36:44.238137', 'step': 9202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.268043', 'step': 9202, 'epoch': 2} {'type': 'loss', 'content': 0.09630157798528671, 'timestamp': '2025-09-10 02:36:44.270047', 'step': 9203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.299861', 'step': 9203, 'epoch': 2} {'type': 'loss', 'content': 0.057141032069921494, 'timestamp': '2025-09-10 02:36:44.323977', 'step': 9204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:44.354335', 'step': 9204, 'epoch': 2} {'type': 'loss', 'content': 0.16337358951568604, 'timestamp': '2025-09-10 02:36:44.356490', 'step': 9205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:44.387957', 'step': 9205, 'epoch': 2} {'type': 'loss', 'content': 0.11872398108243942, 'timestamp': '2025-09-10 02:36:44.390124', 'step': 9206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.419813', 'step': 9206, 'epoch': 2} {'type': 'loss', 'content': 0.08261599391698837, 'timestamp': '2025-09-10 02:36:44.422358', 'step': 9207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.451771', 'step': 9207, 'epoch': 2} {'type': 'loss', 'content': 0.09866128861904144, 'timestamp': '2025-09-10 02:36:44.475227', 'step': 9208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.504757', 'step': 9208, 'epoch': 2} {'type': 'loss', 'content': 0.08892251551151276, 'timestamp': '2025-09-10 02:36:44.507076', 'step': 9209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:44.537023', 'step': 9209, 'epoch': 2} {'type': 'loss', 'content': 0.12603428959846497, 'timestamp': '2025-09-10 02:36:44.540577', 'step': 9210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.572720', 'step': 9210, 'epoch': 2} {'type': 'loss', 'content': 0.14168253540992737, 'timestamp': '2025-09-10 02:36:44.575022', 'step': 9211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.604560', 'step': 9211, 'epoch': 2} {'type': 'loss', 'content': 0.14110124111175537, 'timestamp': '2025-09-10 02:36:44.627842', 'step': 9212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:44.658746', 'step': 9212, 'epoch': 2} {'type': 'loss', 'content': 0.1321842521429062, 'timestamp': '2025-09-10 02:36:44.662433', 'step': 9213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:44.692321', 'step': 9213, 'epoch': 2} {'type': 'loss', 'content': 0.13442738354206085, 'timestamp': '2025-09-10 02:36:44.694857', 'step': 9214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.724827', 'step': 9214, 'epoch': 2} {'type': 'loss', 'content': 0.05823338031768799, 'timestamp': '2025-09-10 02:36:44.727113', 'step': 9215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:44.756633', 'step': 9215, 'epoch': 2} {'type': 'loss', 'content': 0.06470923870801926, 'timestamp': '2025-09-10 02:36:44.780952', 'step': 9216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:44.811542', 'step': 9216, 'epoch': 2} {'type': 'loss', 'content': 0.09638023376464844, 'timestamp': '2025-09-10 02:36:44.814445', 'step': 9217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:44.844891', 'step': 9217, 'epoch': 2} {'type': 'loss', 'content': 0.09511921554803848, 'timestamp': '2025-09-10 02:36:44.847475', 'step': 9218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.878053', 'step': 9218, 'epoch': 2} {'type': 'loss', 'content': 0.07946188002824783, 'timestamp': '2025-09-10 02:36:44.880439', 'step': 9219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:44.911192', 'step': 9219, 'epoch': 2} {'type': 'loss', 'content': 0.10213498026132584, 'timestamp': '2025-09-10 02:36:44.934809', 'step': 9220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.965165', 'step': 9220, 'epoch': 2} {'type': 'loss', 'content': 0.07968392968177795, 'timestamp': '2025-09-10 02:36:44.967470', 'step': 9221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:44.997099', 'step': 9221, 'epoch': 2} {'type': 'loss', 'content': 0.10688178241252899, 'timestamp': '2025-09-10 02:36:44.999434', 'step': 9222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:45.029135', 'step': 9222, 'epoch': 2} {'type': 'loss', 'content': 0.10467325150966644, 'timestamp': '2025-09-10 02:36:45.031401', 'step': 9223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:45.061603', 'step': 9223, 'epoch': 2} {'type': 'loss', 'content': 0.1713647097349167, 'timestamp': '2025-09-10 02:36:45.085740', 'step': 9224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:45.117120', 'step': 9224, 'epoch': 2} {'type': 'loss', 'content': 0.17557671666145325, 'timestamp': '2025-09-10 02:36:45.119226', 'step': 9225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:45.149385', 'step': 9225, 'epoch': 2} {'type': 'loss', 'content': 0.10211062431335449, 'timestamp': '2025-09-10 02:36:45.151805', 'step': 9226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:45.181175', 'step': 9226, 'epoch': 2} {'type': 'loss', 'content': 0.09531471878290176, 'timestamp': '2025-09-10 02:36:45.183400', 'step': 9227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:45.213633', 'step': 9227, 'epoch': 2} {'type': 'loss', 'content': 0.10889694094657898, 'timestamp': '2025-09-10 02:36:45.237429', 'step': 9228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:45.269141', 'step': 9228, 'epoch': 2} {'type': 'loss', 'content': 0.09594143182039261, 'timestamp': '2025-09-10 02:36:45.272204', 'step': 9229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:45.301518', 'step': 9229, 'epoch': 2} {'type': 'loss', 'content': 0.10825596749782562, 'timestamp': '2025-09-10 02:36:45.304099', 'step': 9230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:45.334389', 'step': 9230, 'epoch': 2} {'type': 'loss', 'content': 0.09280958026647568, 'timestamp': '2025-09-10 02:36:45.337625', 'step': 9231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:45.371778', 'step': 9231, 'epoch': 2} {'type': 'loss', 'content': 0.06988120079040527, 'timestamp': '2025-09-10 02:36:45.397649', 'step': 9232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:45.430131', 'step': 9232, 'epoch': 2} {'type': 'loss', 'content': 0.13151761889457703, 'timestamp': '2025-09-10 02:36:45.433278', 'step': 9233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:45.465958', 'step': 9233, 'epoch': 2} {'type': 'loss', 'content': 0.14784358441829681, 'timestamp': '2025-09-10 02:36:45.468203', 'step': 9234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:45.497869', 'step': 9234, 'epoch': 2} {'type': 'loss', 'content': 0.15423330664634705, 'timestamp': '2025-09-10 02:36:45.501931', 'step': 9235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:45.532429', 'step': 9235, 'epoch': 2} {'type': 'loss', 'content': 0.14746367931365967, 'timestamp': '2025-09-10 02:36:45.555943', 'step': 9236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:45.587916', 'step': 9236, 'epoch': 2} {'type': 'loss', 'content': 0.13475461304187775, 'timestamp': '2025-09-10 02:36:45.589987', 'step': 9237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:45.619908', 'step': 9237, 'epoch': 2} {'type': 'loss', 'content': 0.1238333210349083, 'timestamp': '2025-09-10 02:36:45.622343', 'step': 9238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:45.651970', 'step': 9238, 'epoch': 2} {'type': 'loss', 'content': 0.13941381871700287, 'timestamp': '2025-09-10 02:36:45.654422', 'step': 9239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:45.689457', 'step': 9239, 'epoch': 2} {'type': 'loss', 'content': 0.10472545027732849, 'timestamp': '2025-09-10 02:36:45.714521', 'step': 9240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:45.752284', 'step': 9240, 'epoch': 2} {'type': 'loss', 'content': 0.15991714596748352, 'timestamp': '2025-09-10 02:36:45.754652', 'step': 9241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:45.786893', 'step': 9241, 'epoch': 2} {'type': 'loss', 'content': 0.11843614280223846, 'timestamp': '2025-09-10 02:36:45.789189', 'step': 9242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:45.819124', 'step': 9242, 'epoch': 2} {'type': 'loss', 'content': 0.0935794860124588, 'timestamp': '2025-09-10 02:36:45.821339', 'step': 9243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:45.852495', 'step': 9243, 'epoch': 2} {'type': 'loss', 'content': 0.1605231612920761, 'timestamp': '2025-09-10 02:36:45.876496', 'step': 9244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:45.908527', 'step': 9244, 'epoch': 2} {'type': 'loss', 'content': 0.10687081515789032, 'timestamp': '2025-09-10 02:36:45.912510', 'step': 9245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:45.946317', 'step': 9245, 'epoch': 2} {'type': 'loss', 'content': 0.04105684533715248, 'timestamp': '2025-09-10 02:36:45.948643', 'step': 9246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:45.979551', 'step': 9246, 'epoch': 2} {'type': 'loss', 'content': 0.09034127742052078, 'timestamp': '2025-09-10 02:36:45.981912', 'step': 9247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:46.018441', 'step': 9247, 'epoch': 2} {'type': 'loss', 'content': 0.10002154856920242, 'timestamp': '2025-09-10 02:36:46.043901', 'step': 9248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:46.079155', 'step': 9248, 'epoch': 2} {'type': 'loss', 'content': 0.09430883079767227, 'timestamp': '2025-09-10 02:36:46.085563', 'step': 9249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:46.123374', 'step': 9249, 'epoch': 2} {'type': 'loss', 'content': 0.1477116197347641, 'timestamp': '2025-09-10 02:36:46.126340', 'step': 9250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:46.161886', 'step': 9250, 'epoch': 2} {'type': 'loss', 'content': 0.10004495084285736, 'timestamp': '2025-09-10 02:36:46.164279', 'step': 9251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:46.193808', 'step': 9251, 'epoch': 2} {'type': 'loss', 'content': 0.10745395720005035, 'timestamp': '2025-09-10 02:36:46.217305', 'step': 9252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:46.249240', 'step': 9252, 'epoch': 2} {'type': 'loss', 'content': 0.08612269163131714, 'timestamp': '2025-09-10 02:36:46.251395', 'step': 9253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:46.282781', 'step': 9253, 'epoch': 2} {'type': 'loss', 'content': 0.1402297466993332, 'timestamp': '2025-09-10 02:36:46.287748', 'step': 9254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:46.317648', 'step': 9254, 'epoch': 2} {'type': 'loss', 'content': 0.06788838654756546, 'timestamp': '2025-09-10 02:36:46.320270', 'step': 9255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:46.350334', 'step': 9255, 'epoch': 2} {'type': 'loss', 'content': 0.1894090324640274, 'timestamp': '2025-09-10 02:36:46.373910', 'step': 9256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:46.404800', 'step': 9256, 'epoch': 2} {'type': 'loss', 'content': 0.24951602518558502, 'timestamp': '2025-09-10 02:36:46.408677', 'step': 9257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:46.442022', 'step': 9257, 'epoch': 2} {'type': 'loss', 'content': 0.21011370420455933, 'timestamp': '2025-09-10 02:36:46.444283', 'step': 9258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:46.474928', 'step': 9258, 'epoch': 2} {'type': 'loss', 'content': 0.14161208271980286, 'timestamp': '2025-09-10 02:36:46.477382', 'step': 9259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:46.507033', 'step': 9259, 'epoch': 2} {'type': 'loss', 'content': 0.08605988323688507, 'timestamp': '2025-09-10 02:36:46.531938', 'step': 9260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:46.564400', 'step': 9260, 'epoch': 2} {'type': 'loss', 'content': 0.11711448431015015, 'timestamp': '2025-09-10 02:36:46.566808', 'step': 9261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:46.599286', 'step': 9261, 'epoch': 2} {'type': 'loss', 'content': 0.06971258670091629, 'timestamp': '2025-09-10 02:36:46.601904', 'step': 9262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:46.634026', 'step': 9262, 'epoch': 2} {'type': 'loss', 'content': 0.09360484033823013, 'timestamp': '2025-09-10 02:36:46.639452', 'step': 9263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:46.671450', 'step': 9263, 'epoch': 2} {'type': 'loss', 'content': 0.2160010039806366, 'timestamp': '2025-09-10 02:36:46.696773', 'step': 9264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:46.729506', 'step': 9264, 'epoch': 2} {'type': 'loss', 'content': 0.16451582312583923, 'timestamp': '2025-09-10 02:36:46.733081', 'step': 9265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:46.766493', 'step': 9265, 'epoch': 2} {'type': 'loss', 'content': 0.18385687470436096, 'timestamp': '2025-09-10 02:36:46.768673', 'step': 9266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:46.799327', 'step': 9266, 'epoch': 2} {'type': 'loss', 'content': 0.13218411803245544, 'timestamp': '2025-09-10 02:36:46.801481', 'step': 9267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:46.832605', 'step': 9267, 'epoch': 2} {'type': 'loss', 'content': 0.10665369033813477, 'timestamp': '2025-09-10 02:36:46.856252', 'step': 9268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:46.887197', 'step': 9268, 'epoch': 2} {'type': 'loss', 'content': 0.1157766580581665, 'timestamp': '2025-09-10 02:36:46.889270', 'step': 9269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:36:46.920156', 'step': 9269, 'epoch': 2} {'type': 'loss', 'content': 0.13250769674777985, 'timestamp': '2025-09-10 02:36:46.924637', 'step': 9270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:46.957124', 'step': 9270, 'epoch': 2} {'type': 'loss', 'content': 0.07226894795894623, 'timestamp': '2025-09-10 02:36:46.959899', 'step': 9271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:46.990414', 'step': 9271, 'epoch': 2} {'type': 'loss', 'content': 0.11671116948127747, 'timestamp': '2025-09-10 02:36:47.015538', 'step': 9272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:47.048886', 'step': 9272, 'epoch': 2} {'type': 'loss', 'content': 0.08657602965831757, 'timestamp': '2025-09-10 02:36:47.050921', 'step': 9273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:36:47.080445', 'step': 9273, 'epoch': 2} {'type': 'loss', 'content': 0.07565218955278397, 'timestamp': '2025-09-10 02:36:47.085158', 'step': 9274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:47.115695', 'step': 9274, 'epoch': 2} {'type': 'loss', 'content': 0.07291168719530106, 'timestamp': '2025-09-10 02:36:47.118095', 'step': 9275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.148422', 'step': 9275, 'epoch': 2} {'type': 'loss', 'content': 0.12675684690475464, 'timestamp': '2025-09-10 02:36:47.171970', 'step': 9276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:47.202187', 'step': 9276, 'epoch': 2} {'type': 'loss', 'content': 0.12353522330522537, 'timestamp': '2025-09-10 02:36:47.204461', 'step': 9277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.233582', 'step': 9277, 'epoch': 2} {'type': 'loss', 'content': 0.06108906865119934, 'timestamp': '2025-09-10 02:36:47.235792', 'step': 9278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:47.265840', 'step': 9278, 'epoch': 2} {'type': 'loss', 'content': 0.14990709722042084, 'timestamp': '2025-09-10 02:36:47.268212', 'step': 9279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.298468', 'step': 9279, 'epoch': 2} {'type': 'loss', 'content': 0.13545922935009003, 'timestamp': '2025-09-10 02:36:47.326677', 'step': 9280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.356571', 'step': 9280, 'epoch': 2} {'type': 'loss', 'content': 0.07369254529476166, 'timestamp': '2025-09-10 02:36:47.359244', 'step': 9281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.390590', 'step': 9281, 'epoch': 2} {'type': 'loss', 'content': 0.12410739064216614, 'timestamp': '2025-09-10 02:36:47.392995', 'step': 9282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:47.423045', 'step': 9282, 'epoch': 2} {'type': 'loss', 'content': 0.10283917188644409, 'timestamp': '2025-09-10 02:36:47.425444', 'step': 9283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:47.455473', 'step': 9283, 'epoch': 2} {'type': 'loss', 'content': 0.09940950572490692, 'timestamp': '2025-09-10 02:36:47.478971', 'step': 9284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:47.509174', 'step': 9284, 'epoch': 2} {'type': 'loss', 'content': 0.17953889071941376, 'timestamp': '2025-09-10 02:36:47.511490', 'step': 9285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.541758', 'step': 9285, 'epoch': 2} {'type': 'loss', 'content': 0.11772933602333069, 'timestamp': '2025-09-10 02:36:47.544002', 'step': 9286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:47.574378', 'step': 9286, 'epoch': 2} {'type': 'loss', 'content': 0.15051239728927612, 'timestamp': '2025-09-10 02:36:47.576604', 'step': 9287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:47.606746', 'step': 9287, 'epoch': 2} {'type': 'loss', 'content': 0.0609806589782238, 'timestamp': '2025-09-10 02:36:47.630366', 'step': 9288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:47.660531', 'step': 9288, 'epoch': 2} {'type': 'loss', 'content': 0.08494218438863754, 'timestamp': '2025-09-10 02:36:47.662784', 'step': 9289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:47.693379', 'step': 9289, 'epoch': 2} {'type': 'loss', 'content': 0.04399996995925903, 'timestamp': '2025-09-10 02:36:47.695623', 'step': 9290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:47.726847', 'step': 9290, 'epoch': 2} {'type': 'loss', 'content': 0.14185282588005066, 'timestamp': '2025-09-10 02:36:47.729857', 'step': 9291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.760191', 'step': 9291, 'epoch': 2} {'type': 'loss', 'content': 0.12923456728458405, 'timestamp': '2025-09-10 02:36:47.783624', 'step': 9292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:47.815335', 'step': 9292, 'epoch': 2} {'type': 'loss', 'content': 0.17112870514392853, 'timestamp': '2025-09-10 02:36:47.819466', 'step': 9293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.849413', 'step': 9293, 'epoch': 2} {'type': 'loss', 'content': 0.16253562271595, 'timestamp': '2025-09-10 02:36:47.851730', 'step': 9294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.881634', 'step': 9294, 'epoch': 2} {'type': 'loss', 'content': 0.07027839869260788, 'timestamp': '2025-09-10 02:36:47.883933', 'step': 9295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:47.913366', 'step': 9295, 'epoch': 2} {'type': 'loss', 'content': 0.16240759193897247, 'timestamp': '2025-09-10 02:36:47.936891', 'step': 9296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:47.967162', 'step': 9296, 'epoch': 2} {'type': 'loss', 'content': 0.04376694932579994, 'timestamp': '2025-09-10 02:36:47.969441', 'step': 9297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:48.000590', 'step': 9297, 'epoch': 2} {'type': 'loss', 'content': 0.1610921025276184, 'timestamp': '2025-09-10 02:36:48.003349', 'step': 9298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:48.035753', 'step': 9298, 'epoch': 2} {'type': 'loss', 'content': 0.15946584939956665, 'timestamp': '2025-09-10 02:36:48.038124', 'step': 9299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:48.068214', 'step': 9299, 'epoch': 2} {'type': 'loss', 'content': 0.18636159598827362, 'timestamp': '2025-09-10 02:36:48.091822', 'step': 9300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:48.122409', 'step': 9300, 'epoch': 2} {'type': 'loss', 'content': 0.0628698468208313, 'timestamp': '2025-09-10 02:36:48.125676', 'step': 9301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:48.155637', 'step': 9301, 'epoch': 2} {'type': 'loss', 'content': 0.11373692750930786, 'timestamp': '2025-09-10 02:36:48.157913', 'step': 9302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:48.189049', 'step': 9302, 'epoch': 2} {'type': 'loss', 'content': 0.09276963770389557, 'timestamp': '2025-09-10 02:36:48.191539', 'step': 9303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:48.221248', 'step': 9303, 'epoch': 2} {'type': 'loss', 'content': 0.07423611730337143, 'timestamp': '2025-09-10 02:36:48.245227', 'step': 9304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:48.275472', 'step': 9304, 'epoch': 2} {'type': 'loss', 'content': 0.1133122444152832, 'timestamp': '2025-09-10 02:36:48.278001', 'step': 9305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:48.308138', 'step': 9305, 'epoch': 2} {'type': 'loss', 'content': 0.10696984082460403, 'timestamp': '2025-09-10 02:36:48.314075', 'step': 9306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:48.348130', 'step': 9306, 'epoch': 2} {'type': 'loss', 'content': 0.09815292805433273, 'timestamp': '2025-09-10 02:36:48.352411', 'step': 9307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:48.387081', 'step': 9307, 'epoch': 2} {'type': 'loss', 'content': 0.13095122575759888, 'timestamp': '2025-09-10 02:36:48.411642', 'step': 9308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:48.445218', 'step': 9308, 'epoch': 2} {'type': 'loss', 'content': 0.11679887771606445, 'timestamp': '2025-09-10 02:36:48.448679', 'step': 9309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:48.482429', 'step': 9309, 'epoch': 2} {'type': 'loss', 'content': 0.09023842960596085, 'timestamp': '2025-09-10 02:36:48.486094', 'step': 9310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:48.519244', 'step': 9310, 'epoch': 2} {'type': 'loss', 'content': 0.06876487284898758, 'timestamp': '2025-09-10 02:36:48.524492', 'step': 9311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:48.558289', 'step': 9311, 'epoch': 2} {'type': 'loss', 'content': 0.15830329060554504, 'timestamp': '2025-09-10 02:36:48.582157', 'step': 9312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:48.612311', 'step': 9312, 'epoch': 2} {'type': 'loss', 'content': 0.1284511834383011, 'timestamp': '2025-09-10 02:36:48.615649', 'step': 9313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:48.647063', 'step': 9313, 'epoch': 2} {'type': 'loss', 'content': 0.07311047613620758, 'timestamp': '2025-09-10 02:36:48.649417', 'step': 9314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:48.679344', 'step': 9314, 'epoch': 2} {'type': 'loss', 'content': 0.08726008236408234, 'timestamp': '2025-09-10 02:36:48.681635', 'step': 9315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:48.711250', 'step': 9315, 'epoch': 2} {'type': 'loss', 'content': 0.0879138931632042, 'timestamp': '2025-09-10 02:36:48.735840', 'step': 9316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:48.765803', 'step': 9316, 'epoch': 2} {'type': 'loss', 'content': 0.1058339923620224, 'timestamp': '2025-09-10 02:36:48.768902', 'step': 9317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:48.800025', 'step': 9317, 'epoch': 2} {'type': 'loss', 'content': 0.059444550424814224, 'timestamp': '2025-09-10 02:36:48.802318', 'step': 9318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:48.831741', 'step': 9318, 'epoch': 2} {'type': 'loss', 'content': 0.06413081288337708, 'timestamp': '2025-09-10 02:36:48.834162', 'step': 9319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:48.863824', 'step': 9319, 'epoch': 2} {'type': 'loss', 'content': 0.09073426574468613, 'timestamp': '2025-09-10 02:36:48.887296', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:36:56.527147', 'step': 9320, 'epoch': 2} {'type': 'pplx', 'content': 11511.215755875703, 'timestamp': '2025-09-10 02:36:56.529997', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:56.558969', 'step': 9320, 'epoch': 2} {'type': 'loss', 'content': 0.09601574391126633, 'timestamp': '2025-09-10 02:36:56.561256', 'step': 9321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:56.590805', 'step': 9321, 'epoch': 2} {'type': 'loss', 'content': 0.11497072130441666, 'timestamp': '2025-09-10 02:36:56.593325', 'step': 9322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:56.624442', 'step': 9322, 'epoch': 2} {'type': 'loss', 'content': 0.12779900431632996, 'timestamp': '2025-09-10 02:36:56.626696', 'step': 9323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:56.656334', 'step': 9323, 'epoch': 2} {'type': 'loss', 'content': 0.20289145410060883, 'timestamp': '2025-09-10 02:36:56.681045', 'step': 9324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:56.712518', 'step': 9324, 'epoch': 2} {'type': 'loss', 'content': 0.11634732782840729, 'timestamp': '2025-09-10 02:36:56.714925', 'step': 9325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:56.746387', 'step': 9325, 'epoch': 2} {'type': 'loss', 'content': 0.09603086858987808, 'timestamp': '2025-09-10 02:36:56.750098', 'step': 9326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:56.779695', 'step': 9326, 'epoch': 2} {'type': 'loss', 'content': 0.07173942029476166, 'timestamp': '2025-09-10 02:36:56.849564', 'step': 9327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:56.947526', 'step': 9327, 'epoch': 2} {'type': 'loss', 'content': 0.17628620564937592, 'timestamp': '2025-09-10 02:36:56.971724', 'step': 9328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:57.009099', 'step': 9328, 'epoch': 2} {'type': 'loss', 'content': 0.10689153522253036, 'timestamp': '2025-09-10 02:36:57.011422', 'step': 9329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:57.041006', 'step': 9329, 'epoch': 2} {'type': 'loss', 'content': 0.10065140575170517, 'timestamp': '2025-09-10 02:36:57.044546', 'step': 9330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:57.075199', 'step': 9330, 'epoch': 2} {'type': 'loss', 'content': 0.14247384667396545, 'timestamp': '2025-09-10 02:36:57.077179', 'step': 9331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:57.106571', 'step': 9331, 'epoch': 2} {'type': 'loss', 'content': 0.12345711886882782, 'timestamp': '2025-09-10 02:36:57.130303', 'step': 9332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.160499', 'step': 9332, 'epoch': 2} {'type': 'loss', 'content': 0.12671725451946259, 'timestamp': '2025-09-10 02:36:57.162653', 'step': 9333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:57.194610', 'step': 9333, 'epoch': 2} {'type': 'loss', 'content': 0.05808534473180771, 'timestamp': '2025-09-10 02:36:57.196943', 'step': 9334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.226653', 'step': 9334, 'epoch': 2} {'type': 'loss', 'content': 0.08563876152038574, 'timestamp': '2025-09-10 02:36:57.228904', 'step': 9335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:57.259208', 'step': 9335, 'epoch': 2} {'type': 'loss', 'content': 0.06544682383537292, 'timestamp': '2025-09-10 02:36:57.282754', 'step': 9336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:57.313268', 'step': 9336, 'epoch': 2} {'type': 'loss', 'content': 0.12198340147733688, 'timestamp': '2025-09-10 02:36:57.315595', 'step': 9337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:57.346812', 'step': 9337, 'epoch': 2} {'type': 'loss', 'content': 0.06800787150859833, 'timestamp': '2025-09-10 02:36:57.349505', 'step': 9338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:57.380015', 'step': 9338, 'epoch': 2} {'type': 'loss', 'content': 0.0727231353521347, 'timestamp': '2025-09-10 02:36:57.382409', 'step': 9339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:57.413696', 'step': 9339, 'epoch': 2} {'type': 'loss', 'content': 0.06993895024061203, 'timestamp': '2025-09-10 02:36:57.437030', 'step': 9340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.467232', 'step': 9340, 'epoch': 2} {'type': 'loss', 'content': 0.10717135667800903, 'timestamp': '2025-09-10 02:36:57.469255', 'step': 9341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.498054', 'step': 9341, 'epoch': 2} {'type': 'loss', 'content': 0.04770222306251526, 'timestamp': '2025-09-10 02:36:57.500963', 'step': 9342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.531054', 'step': 9342, 'epoch': 2} {'type': 'loss', 'content': 0.12193059921264648, 'timestamp': '2025-09-10 02:36:57.533423', 'step': 9343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.563025', 'step': 9343, 'epoch': 2} {'type': 'loss', 'content': 0.07365099340677261, 'timestamp': '2025-09-10 02:36:57.586796', 'step': 9344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.618260', 'step': 9344, 'epoch': 2} {'type': 'loss', 'content': 0.09255509823560715, 'timestamp': '2025-09-10 02:36:57.620320', 'step': 9345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:57.650341', 'step': 9345, 'epoch': 2} {'type': 'loss', 'content': 0.06698647141456604, 'timestamp': '2025-09-10 02:36:57.652563', 'step': 9346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:57.682481', 'step': 9346, 'epoch': 2} {'type': 'loss', 'content': 0.15437200665473938, 'timestamp': '2025-09-10 02:36:57.684797', 'step': 9347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:57.715134', 'step': 9347, 'epoch': 2} {'type': 'loss', 'content': 0.12539103627204895, 'timestamp': '2025-09-10 02:36:57.738611', 'step': 9348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:57.769360', 'step': 9348, 'epoch': 2} {'type': 'loss', 'content': 0.06500361114740372, 'timestamp': '2025-09-10 02:36:57.771576', 'step': 9349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.802485', 'step': 9349, 'epoch': 2} {'type': 'loss', 'content': 0.1428026556968689, 'timestamp': '2025-09-10 02:36:57.804966', 'step': 9350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:57.835313', 'step': 9350, 'epoch': 2} {'type': 'loss', 'content': 0.08945668488740921, 'timestamp': '2025-09-10 02:36:57.837979', 'step': 9351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.867350', 'step': 9351, 'epoch': 2} {'type': 'loss', 'content': 0.08400420099496841, 'timestamp': '2025-09-10 02:36:57.890763', 'step': 9352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:57.921599', 'step': 9352, 'epoch': 2} {'type': 'loss', 'content': 0.06456322968006134, 'timestamp': '2025-09-10 02:36:57.923925', 'step': 9353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:57.953778', 'step': 9353, 'epoch': 2} {'type': 'loss', 'content': 0.09991082549095154, 'timestamp': '2025-09-10 02:36:57.955969', 'step': 9354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:57.985864', 'step': 9354, 'epoch': 2} {'type': 'loss', 'content': 0.0943845584988594, 'timestamp': '2025-09-10 02:36:57.988376', 'step': 9355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.018505', 'step': 9355, 'epoch': 2} {'type': 'loss', 'content': 0.13562697172164917, 'timestamp': '2025-09-10 02:36:58.042010', 'step': 9356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:58.072526', 'step': 9356, 'epoch': 2} {'type': 'loss', 'content': 0.05185786634683609, 'timestamp': '2025-09-10 02:36:58.074950', 'step': 9357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:58.104733', 'step': 9357, 'epoch': 2} {'type': 'loss', 'content': 0.06239280477166176, 'timestamp': '2025-09-10 02:36:58.107355', 'step': 9358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:58.138818', 'step': 9358, 'epoch': 2} {'type': 'loss', 'content': 0.10654540359973907, 'timestamp': '2025-09-10 02:36:58.140991', 'step': 9359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:58.170674', 'step': 9359, 'epoch': 2} {'type': 'loss', 'content': 0.10368245840072632, 'timestamp': '2025-09-10 02:36:58.194454', 'step': 9360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.224581', 'step': 9360, 'epoch': 2} {'type': 'loss', 'content': 0.16357897222042084, 'timestamp': '2025-09-10 02:36:58.226894', 'step': 9361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.256120', 'step': 9361, 'epoch': 2} {'type': 'loss', 'content': 0.18934334814548492, 'timestamp': '2025-09-10 02:36:58.258321', 'step': 9362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:58.287946', 'step': 9362, 'epoch': 2} {'type': 'loss', 'content': 0.21651612222194672, 'timestamp': '2025-09-10 02:36:58.290246', 'step': 9363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:58.319754', 'step': 9363, 'epoch': 2} {'type': 'loss', 'content': 0.20172414183616638, 'timestamp': '2025-09-10 02:36:58.343172', 'step': 9364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.379398', 'step': 9364, 'epoch': 2} {'type': 'loss', 'content': 0.09250685572624207, 'timestamp': '2025-09-10 02:36:58.381843', 'step': 9365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.411414', 'step': 9365, 'epoch': 2} {'type': 'loss', 'content': 0.18786409497261047, 'timestamp': '2025-09-10 02:36:58.413858', 'step': 9366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:58.444558', 'step': 9366, 'epoch': 2} {'type': 'loss', 'content': 0.15723015367984772, 'timestamp': '2025-09-10 02:36:58.447028', 'step': 9367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:58.476013', 'step': 9367, 'epoch': 2} {'type': 'loss', 'content': 0.13927319645881653, 'timestamp': '2025-09-10 02:36:58.499542', 'step': 9368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:58.529918', 'step': 9368, 'epoch': 2} {'type': 'loss', 'content': 0.16977600753307343, 'timestamp': '2025-09-10 02:36:58.532517', 'step': 9369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:58.563077', 'step': 9369, 'epoch': 2} {'type': 'loss', 'content': 0.1304820477962494, 'timestamp': '2025-09-10 02:36:58.565436', 'step': 9370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.595263', 'step': 9370, 'epoch': 2} {'type': 'loss', 'content': 0.09162324666976929, 'timestamp': '2025-09-10 02:36:58.597824', 'step': 9371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.628283', 'step': 9371, 'epoch': 2} {'type': 'loss', 'content': 0.11621560156345367, 'timestamp': '2025-09-10 02:36:58.651765', 'step': 9372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.681665', 'step': 9372, 'epoch': 2} {'type': 'loss', 'content': 0.0826602503657341, 'timestamp': '2025-09-10 02:36:58.683718', 'step': 9373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.713337', 'step': 9373, 'epoch': 2} {'type': 'loss', 'content': 0.1420012265443802, 'timestamp': '2025-09-10 02:36:58.715746', 'step': 9374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:58.745538', 'step': 9374, 'epoch': 2} {'type': 'loss', 'content': 0.1607254594564438, 'timestamp': '2025-09-10 02:36:58.748528', 'step': 9375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:58.778590', 'step': 9375, 'epoch': 2} {'type': 'loss', 'content': 0.12955330312252045, 'timestamp': '2025-09-10 02:36:58.802081', 'step': 9376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:58.832169', 'step': 9376, 'epoch': 2} {'type': 'loss', 'content': 0.12311487644910812, 'timestamp': '2025-09-10 02:36:58.834456', 'step': 9377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:58.864312', 'step': 9377, 'epoch': 2} {'type': 'loss', 'content': 0.15354642271995544, 'timestamp': '2025-09-10 02:36:58.866800', 'step': 9378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:58.897228', 'step': 9378, 'epoch': 2} {'type': 'loss', 'content': 0.11179067194461823, 'timestamp': '2025-09-10 02:36:58.899422', 'step': 9379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.929094', 'step': 9379, 'epoch': 2} {'type': 'loss', 'content': 0.08127658069133759, 'timestamp': '2025-09-10 02:36:58.952378', 'step': 9380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:58.982521', 'step': 9380, 'epoch': 2} {'type': 'loss', 'content': 0.15408995747566223, 'timestamp': '2025-09-10 02:36:58.984706', 'step': 9381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:59.015109', 'step': 9381, 'epoch': 2} {'type': 'loss', 'content': 0.09054604172706604, 'timestamp': '2025-09-10 02:36:59.017025', 'step': 9382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:59.047131', 'step': 9382, 'epoch': 2} {'type': 'loss', 'content': 0.08153887838125229, 'timestamp': '2025-09-10 02:36:59.050749', 'step': 9383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:59.080634', 'step': 9383, 'epoch': 2} {'type': 'loss', 'content': 0.1744406521320343, 'timestamp': '2025-09-10 02:36:59.104431', 'step': 9384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.134954', 'step': 9384, 'epoch': 2} {'type': 'loss', 'content': 0.15025314688682556, 'timestamp': '2025-09-10 02:36:59.137528', 'step': 9385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:59.168817', 'step': 9385, 'epoch': 2} {'type': 'loss', 'content': 0.19806458055973053, 'timestamp': '2025-09-10 02:36:59.171286', 'step': 9386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:36:59.201046', 'step': 9386, 'epoch': 2} {'type': 'loss', 'content': 0.055334772914648056, 'timestamp': '2025-09-10 02:36:59.203824', 'step': 9387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.233659', 'step': 9387, 'epoch': 2} {'type': 'loss', 'content': 0.17110316455364227, 'timestamp': '2025-09-10 02:36:59.257914', 'step': 9388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.287597', 'step': 9388, 'epoch': 2} {'type': 'loss', 'content': 0.13100498914718628, 'timestamp': '2025-09-10 02:36:59.289946', 'step': 9389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.320854', 'step': 9389, 'epoch': 2} {'type': 'loss', 'content': 0.13016952574253082, 'timestamp': '2025-09-10 02:36:59.323272', 'step': 9390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:36:59.354592', 'step': 9390, 'epoch': 2} {'type': 'loss', 'content': 0.1584382951259613, 'timestamp': '2025-09-10 02:36:59.356785', 'step': 9391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.387145', 'step': 9391, 'epoch': 2} {'type': 'loss', 'content': 0.11605724692344666, 'timestamp': '2025-09-10 02:36:59.410813', 'step': 9392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.441397', 'step': 9392, 'epoch': 2} {'type': 'loss', 'content': 0.16100266575813293, 'timestamp': '2025-09-10 02:36:59.443787', 'step': 9393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:59.474195', 'step': 9393, 'epoch': 2} {'type': 'loss', 'content': 0.10330457240343094, 'timestamp': '2025-09-10 02:36:59.476705', 'step': 9394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:59.506481', 'step': 9394, 'epoch': 2} {'type': 'loss', 'content': 0.10387246310710907, 'timestamp': '2025-09-10 02:36:59.508664', 'step': 9395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:59.538774', 'step': 9395, 'epoch': 2} {'type': 'loss', 'content': 0.1594475507736206, 'timestamp': '2025-09-10 02:36:59.563967', 'step': 9396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:36:59.593887', 'step': 9396, 'epoch': 2} {'type': 'loss', 'content': 0.08244030177593231, 'timestamp': '2025-09-10 02:36:59.596298', 'step': 9397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:59.626267', 'step': 9397, 'epoch': 2} {'type': 'loss', 'content': 0.16292141377925873, 'timestamp': '2025-09-10 02:36:59.628941', 'step': 9398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:59.659677', 'step': 9398, 'epoch': 2} {'type': 'loss', 'content': 0.1636807769536972, 'timestamp': '2025-09-10 02:36:59.661801', 'step': 9399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.691185', 'step': 9399, 'epoch': 2} {'type': 'loss', 'content': 0.14855563640594482, 'timestamp': '2025-09-10 02:36:59.714916', 'step': 9400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.745175', 'step': 9400, 'epoch': 2} {'type': 'loss', 'content': 0.18662887811660767, 'timestamp': '2025-09-10 02:36:59.747582', 'step': 9401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:36:59.778729', 'step': 9401, 'epoch': 2} {'type': 'loss', 'content': 0.13392382860183716, 'timestamp': '2025-09-10 02:36:59.781282', 'step': 9402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.811421', 'step': 9402, 'epoch': 2} {'type': 'loss', 'content': 0.10204528272151947, 'timestamp': '2025-09-10 02:36:59.813766', 'step': 9403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:59.843531', 'step': 9403, 'epoch': 2} {'type': 'loss', 'content': 0.18624795973300934, 'timestamp': '2025-09-10 02:36:59.867196', 'step': 9404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:59.898188', 'step': 9404, 'epoch': 2} {'type': 'loss', 'content': 0.19189958274364471, 'timestamp': '2025-09-10 02:36:59.901272', 'step': 9405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:36:59.932622', 'step': 9405, 'epoch': 2} {'type': 'loss', 'content': 0.11154269427061081, 'timestamp': '2025-09-10 02:36:59.935490', 'step': 9406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:36:59.969269', 'step': 9406, 'epoch': 2} {'type': 'loss', 'content': 0.13296319544315338, 'timestamp': '2025-09-10 02:36:59.971657', 'step': 9407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:00.002024', 'step': 9407, 'epoch': 2} {'type': 'loss', 'content': 0.1284380406141281, 'timestamp': '2025-09-10 02:37:00.025838', 'step': 9408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.069354', 'step': 9408, 'epoch': 2} {'type': 'loss', 'content': 0.2494145780801773, 'timestamp': '2025-09-10 02:37:00.071311', 'step': 9409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.100573', 'step': 9409, 'epoch': 2} {'type': 'loss', 'content': 0.1245153397321701, 'timestamp': '2025-09-10 02:37:00.102828', 'step': 9410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:00.132981', 'step': 9410, 'epoch': 2} {'type': 'loss', 'content': 0.24019686877727509, 'timestamp': '2025-09-10 02:37:00.135236', 'step': 9411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.166892', 'step': 9411, 'epoch': 2} {'type': 'loss', 'content': 0.08151514828205109, 'timestamp': '2025-09-10 02:37:00.190485', 'step': 9412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:00.223593', 'step': 9412, 'epoch': 2} {'type': 'loss', 'content': 0.12097876518964767, 'timestamp': '2025-09-10 02:37:00.225916', 'step': 9413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.256025', 'step': 9413, 'epoch': 2} {'type': 'loss', 'content': 0.105180524289608, 'timestamp': '2025-09-10 02:37:00.259758', 'step': 9414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:00.291452', 'step': 9414, 'epoch': 2} {'type': 'loss', 'content': 0.1068434789776802, 'timestamp': '2025-09-10 02:37:00.293906', 'step': 9415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:00.325026', 'step': 9415, 'epoch': 2} {'type': 'loss', 'content': 0.09720095247030258, 'timestamp': '2025-09-10 02:37:00.348525', 'step': 9416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.384330', 'step': 9416, 'epoch': 2} {'type': 'loss', 'content': 0.13267846405506134, 'timestamp': '2025-09-10 02:37:00.388766', 'step': 9417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.420771', 'step': 9417, 'epoch': 2} {'type': 'loss', 'content': 0.0883428230881691, 'timestamp': '2025-09-10 02:37:00.422757', 'step': 9418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:00.452147', 'step': 9418, 'epoch': 2} {'type': 'loss', 'content': 0.12423137575387955, 'timestamp': '2025-09-10 02:37:00.454915', 'step': 9419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:00.484223', 'step': 9419, 'epoch': 2} {'type': 'loss', 'content': 0.11391724646091461, 'timestamp': '2025-09-10 02:37:00.507461', 'step': 9420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.539330', 'step': 9420, 'epoch': 2} {'type': 'loss', 'content': 0.09984122961759567, 'timestamp': '2025-09-10 02:37:00.541277', 'step': 9421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:00.575911', 'step': 9421, 'epoch': 2} {'type': 'loss', 'content': 0.10200054943561554, 'timestamp': '2025-09-10 02:37:00.578126', 'step': 9422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.607042', 'step': 9422, 'epoch': 2} {'type': 'loss', 'content': 0.07897139340639114, 'timestamp': '2025-09-10 02:37:00.609071', 'step': 9423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:00.641915', 'step': 9423, 'epoch': 2} {'type': 'loss', 'content': 0.08288604766130447, 'timestamp': '2025-09-10 02:37:00.665515', 'step': 9424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:00.696072', 'step': 9424, 'epoch': 2} {'type': 'loss', 'content': 0.07008063048124313, 'timestamp': '2025-09-10 02:37:00.700558', 'step': 9425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:00.730362', 'step': 9425, 'epoch': 2} {'type': 'loss', 'content': 0.1028839647769928, 'timestamp': '2025-09-10 02:37:00.736296', 'step': 9426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:00.768025', 'step': 9426, 'epoch': 2} {'type': 'loss', 'content': 0.22361250221729279, 'timestamp': '2025-09-10 02:37:00.770530', 'step': 9427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:00.803708', 'step': 9427, 'epoch': 2} {'type': 'loss', 'content': 0.11196034401655197, 'timestamp': '2025-09-10 02:37:00.827179', 'step': 9428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:00.864219', 'step': 9428, 'epoch': 2} {'type': 'loss', 'content': 0.08097543567419052, 'timestamp': '2025-09-10 02:37:00.866128', 'step': 9429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.896129', 'step': 9429, 'epoch': 2} {'type': 'loss', 'content': 0.12301618605852127, 'timestamp': '2025-09-10 02:37:00.897977', 'step': 9430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:00.927021', 'step': 9430, 'epoch': 2} {'type': 'loss', 'content': 0.13382330536842346, 'timestamp': '2025-09-10 02:37:00.929141', 'step': 9431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:00.959197', 'step': 9431, 'epoch': 2} {'type': 'loss', 'content': 0.08841197192668915, 'timestamp': '2025-09-10 02:37:00.982962', 'step': 9432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.018515', 'step': 9432, 'epoch': 2} {'type': 'loss', 'content': 0.0939580425620079, 'timestamp': '2025-09-10 02:37:01.020541', 'step': 9433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:01.049714', 'step': 9433, 'epoch': 2} {'type': 'loss', 'content': 0.11802049726247787, 'timestamp': '2025-09-10 02:37:01.053155', 'step': 9434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:01.085519', 'step': 9434, 'epoch': 2} {'type': 'loss', 'content': 0.09111183881759644, 'timestamp': '2025-09-10 02:37:01.087223', 'step': 9435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:01.117414', 'step': 9435, 'epoch': 2} {'type': 'loss', 'content': 0.12180065363645554, 'timestamp': '2025-09-10 02:37:01.144684', 'step': 9436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:01.191933', 'step': 9436, 'epoch': 2} {'type': 'loss', 'content': 0.10659556835889816, 'timestamp': '2025-09-10 02:37:01.198839', 'step': 9437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.235969', 'step': 9437, 'epoch': 2} {'type': 'loss', 'content': 0.1087951809167862, 'timestamp': '2025-09-10 02:37:01.242149', 'step': 9438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:01.273224', 'step': 9438, 'epoch': 2} {'type': 'loss', 'content': 0.169151172041893, 'timestamp': '2025-09-10 02:37:01.275913', 'step': 9439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.308157', 'step': 9439, 'epoch': 2} {'type': 'loss', 'content': 0.1046789214015007, 'timestamp': '2025-09-10 02:37:01.332087', 'step': 9440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.375452', 'step': 9440, 'epoch': 2} {'type': 'loss', 'content': 0.10677754133939743, 'timestamp': '2025-09-10 02:37:01.376961', 'step': 9441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:01.407456', 'step': 9441, 'epoch': 2} {'type': 'loss', 'content': 0.1881067156791687, 'timestamp': '2025-09-10 02:37:01.409696', 'step': 9442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:01.439595', 'step': 9442, 'epoch': 2} {'type': 'loss', 'content': 0.11392559111118317, 'timestamp': '2025-09-10 02:37:01.442684', 'step': 9443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:01.472259', 'step': 9443, 'epoch': 2} {'type': 'loss', 'content': 0.12713344395160675, 'timestamp': '2025-09-10 02:37:01.495944', 'step': 9444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.525339', 'step': 9444, 'epoch': 2} {'type': 'loss', 'content': 0.06884092837572098, 'timestamp': '2025-09-10 02:37:01.527312', 'step': 9445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:01.558967', 'step': 9445, 'epoch': 2} {'type': 'loss', 'content': 0.08010996133089066, 'timestamp': '2025-09-10 02:37:01.561521', 'step': 9446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.591432', 'step': 9446, 'epoch': 2} {'type': 'loss', 'content': 0.09897594898939133, 'timestamp': '2025-09-10 02:37:01.593147', 'step': 9447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.622756', 'step': 9447, 'epoch': 2} {'type': 'loss', 'content': 0.10479758679866791, 'timestamp': '2025-09-10 02:37:01.646344', 'step': 9448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:01.679174', 'step': 9448, 'epoch': 2} {'type': 'loss', 'content': 0.17420649528503418, 'timestamp': '2025-09-10 02:37:01.684196', 'step': 9449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:01.720427', 'step': 9449, 'epoch': 2} {'type': 'loss', 'content': 0.058199524879455566, 'timestamp': '2025-09-10 02:37:01.722609', 'step': 9450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:01.753375', 'step': 9450, 'epoch': 2} {'type': 'loss', 'content': 0.11441006511449814, 'timestamp': '2025-09-10 02:37:01.759654', 'step': 9451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.789755', 'step': 9451, 'epoch': 2} {'type': 'loss', 'content': 0.08905420452356339, 'timestamp': '2025-09-10 02:37:01.813483', 'step': 9452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.844691', 'step': 9452, 'epoch': 2} {'type': 'loss', 'content': 0.0750933587551117, 'timestamp': '2025-09-10 02:37:01.846725', 'step': 9453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:01.876276', 'step': 9453, 'epoch': 2} {'type': 'loss', 'content': 0.13372580707073212, 'timestamp': '2025-09-10 02:37:01.878433', 'step': 9454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.907962', 'step': 9454, 'epoch': 2} {'type': 'loss', 'content': 0.16414691507816315, 'timestamp': '2025-09-10 02:37:01.911918', 'step': 9455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:01.945216', 'step': 9455, 'epoch': 2} {'type': 'loss', 'content': 0.11851084232330322, 'timestamp': '2025-09-10 02:37:01.968403', 'step': 9456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:01.998907', 'step': 9456, 'epoch': 2} {'type': 'loss', 'content': 0.1727754771709442, 'timestamp': '2025-09-10 02:37:02.001465', 'step': 9457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:02.029960', 'step': 9457, 'epoch': 2} {'type': 'loss', 'content': 0.09785930812358856, 'timestamp': '2025-09-10 02:37:02.032591', 'step': 9458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:02.061768', 'step': 9458, 'epoch': 2} {'type': 'loss', 'content': 0.08355476707220078, 'timestamp': '2025-09-10 02:37:02.064524', 'step': 9459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:02.093945', 'step': 9459, 'epoch': 2} {'type': 'loss', 'content': 0.08942459523677826, 'timestamp': '2025-09-10 02:37:02.118198', 'step': 9460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:02.150488', 'step': 9460, 'epoch': 2} {'type': 'loss', 'content': 0.051517304033041, 'timestamp': '2025-09-10 02:37:02.152801', 'step': 9461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:02.184026', 'step': 9461, 'epoch': 2} {'type': 'loss', 'content': 0.14008377492427826, 'timestamp': '2025-09-10 02:37:02.186086', 'step': 9462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:02.217513', 'step': 9462, 'epoch': 2} {'type': 'loss', 'content': 0.14397665858268738, 'timestamp': '2025-09-10 02:37:02.219906', 'step': 9463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:02.249491', 'step': 9463, 'epoch': 2} {'type': 'loss', 'content': 0.15482117235660553, 'timestamp': '2025-09-10 02:37:02.272270', 'step': 9464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:02.305335', 'step': 9464, 'epoch': 2} {'type': 'loss', 'content': 0.05635422095656395, 'timestamp': '2025-09-10 02:37:02.309656', 'step': 9465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:02.338592', 'step': 9465, 'epoch': 2} {'type': 'loss', 'content': 0.08084119856357574, 'timestamp': '2025-09-10 02:37:02.341220', 'step': 9466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:02.380546', 'step': 9466, 'epoch': 2} {'type': 'loss', 'content': 0.18927255272865295, 'timestamp': '2025-09-10 02:37:02.386112', 'step': 9467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:02.427559', 'step': 9467, 'epoch': 2} {'type': 'loss', 'content': 0.16537323594093323, 'timestamp': '2025-09-10 02:37:02.451378', 'step': 9468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:02.483347', 'step': 9468, 'epoch': 2} {'type': 'loss', 'content': 0.08717498183250427, 'timestamp': '2025-09-10 02:37:02.488162', 'step': 9469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:02.530389', 'step': 9469, 'epoch': 2} {'type': 'loss', 'content': 0.16154521703720093, 'timestamp': '2025-09-10 02:37:02.532172', 'step': 9470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:02.565967', 'step': 9470, 'epoch': 2} {'type': 'loss', 'content': 0.06454220414161682, 'timestamp': '2025-09-10 02:37:02.569709', 'step': 9471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:02.600009', 'step': 9471, 'epoch': 2} {'type': 'loss', 'content': 0.10258255898952484, 'timestamp': '2025-09-10 02:37:02.627035', 'step': 9472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:02.658913', 'step': 9472, 'epoch': 2} {'type': 'loss', 'content': 0.19882865250110626, 'timestamp': '2025-09-10 02:37:02.662774', 'step': 9473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:02.698640', 'step': 9473, 'epoch': 2} {'type': 'loss', 'content': 0.10907182842493057, 'timestamp': '2025-09-10 02:37:02.701418', 'step': 9474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:02.735938', 'step': 9474, 'epoch': 2} {'type': 'loss', 'content': 0.1274009346961975, 'timestamp': '2025-09-10 02:37:02.738204', 'step': 9475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:02.768268', 'step': 9475, 'epoch': 2} {'type': 'loss', 'content': 0.05345238745212555, 'timestamp': '2025-09-10 02:37:02.791964', 'step': 9476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:02.822730', 'step': 9476, 'epoch': 2} {'type': 'loss', 'content': 0.07251247018575668, 'timestamp': '2025-09-10 02:37:02.824617', 'step': 9477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:02.853242', 'step': 9477, 'epoch': 2} {'type': 'loss', 'content': 0.1450755000114441, 'timestamp': '2025-09-10 02:37:02.855341', 'step': 9478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:02.885925', 'step': 9478, 'epoch': 2} {'type': 'loss', 'content': 0.08879838138818741, 'timestamp': '2025-09-10 02:37:02.889448', 'step': 9479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:02.920957', 'step': 9479, 'epoch': 2} {'type': 'loss', 'content': 0.1989833116531372, 'timestamp': '2025-09-10 02:37:02.944672', 'step': 9480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:02.974075', 'step': 9480, 'epoch': 2} {'type': 'loss', 'content': 0.09080784022808075, 'timestamp': '2025-09-10 02:37:02.976238', 'step': 9481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.008222', 'step': 9481, 'epoch': 2} {'type': 'loss', 'content': 0.13494367897510529, 'timestamp': '2025-09-10 02:37:03.010465', 'step': 9482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.040017', 'step': 9482, 'epoch': 2} {'type': 'loss', 'content': 0.14451102912425995, 'timestamp': '2025-09-10 02:37:03.042506', 'step': 9483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:03.076744', 'step': 9483, 'epoch': 2} {'type': 'loss', 'content': 0.11209256947040558, 'timestamp': '2025-09-10 02:37:03.103599', 'step': 9484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:03.134799', 'step': 9484, 'epoch': 2} {'type': 'loss', 'content': 0.15110881626605988, 'timestamp': '2025-09-10 02:37:03.145818', 'step': 9485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:03.190170', 'step': 9485, 'epoch': 2} {'type': 'loss', 'content': 0.09473618865013123, 'timestamp': '2025-09-10 02:37:03.192237', 'step': 9486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.222626', 'step': 9486, 'epoch': 2} {'type': 'loss', 'content': 0.10183797031641006, 'timestamp': '2025-09-10 02:37:03.226334', 'step': 9487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.258691', 'step': 9487, 'epoch': 2} {'type': 'loss', 'content': 0.14208626747131348, 'timestamp': '2025-09-10 02:37:03.284924', 'step': 9488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.315487', 'step': 9488, 'epoch': 2} {'type': 'loss', 'content': 0.1346179097890854, 'timestamp': '2025-09-10 02:37:03.317463', 'step': 9489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.347337', 'step': 9489, 'epoch': 2} {'type': 'loss', 'content': 0.10280485451221466, 'timestamp': '2025-09-10 02:37:03.349511', 'step': 9490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:03.390097', 'step': 9490, 'epoch': 2} {'type': 'loss', 'content': 0.053293146193027496, 'timestamp': '2025-09-10 02:37:03.392309', 'step': 9491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:03.422415', 'step': 9491, 'epoch': 2} {'type': 'loss', 'content': 0.09007476270198822, 'timestamp': '2025-09-10 02:37:03.445691', 'step': 9492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.475421', 'step': 9492, 'epoch': 2} {'type': 'loss', 'content': 0.24457521736621857, 'timestamp': '2025-09-10 02:37:03.477357', 'step': 9493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:03.506509', 'step': 9493, 'epoch': 2} {'type': 'loss', 'content': 0.10476367175579071, 'timestamp': '2025-09-10 02:37:03.509279', 'step': 9494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:03.539059', 'step': 9494, 'epoch': 2} {'type': 'loss', 'content': 0.06456424295902252, 'timestamp': '2025-09-10 02:37:03.550097', 'step': 9495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:03.591453', 'step': 9495, 'epoch': 2} {'type': 'loss', 'content': 0.15214821696281433, 'timestamp': '2025-09-10 02:37:03.615223', 'step': 9496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.646345', 'step': 9496, 'epoch': 2} {'type': 'loss', 'content': 0.10000535100698471, 'timestamp': '2025-09-10 02:37:03.648948', 'step': 9497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.679000', 'step': 9497, 'epoch': 2} {'type': 'loss', 'content': 0.15409649908542633, 'timestamp': '2025-09-10 02:37:03.680910', 'step': 9498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.710553', 'step': 9498, 'epoch': 2} {'type': 'loss', 'content': 0.1828390508890152, 'timestamp': '2025-09-10 02:37:03.712950', 'step': 9499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:03.744364', 'step': 9499, 'epoch': 2} {'type': 'loss', 'content': 0.09508654475212097, 'timestamp': '2025-09-10 02:37:03.769293', 'step': 9500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9500', 'timestamp': '2025-09-10 02:37:10.776168', 'step': 9500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:10.807914', 'step': 9500, 'epoch': 2} {'type': 'loss', 'content': 0.19122759997844696, 'timestamp': '2025-09-10 02:37:10.811016', 'step': 9501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:10.841654', 'step': 9501, 'epoch': 2} {'type': 'loss', 'content': 0.0750342383980751, 'timestamp': '2025-09-10 02:37:10.844152', 'step': 9502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:10.873952', 'step': 9502, 'epoch': 2} {'type': 'loss', 'content': 0.11390223354101181, 'timestamp': '2025-09-10 02:37:10.876782', 'step': 9503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:10.906377', 'step': 9503, 'epoch': 2} {'type': 'loss', 'content': 0.06958692520856857, 'timestamp': '2025-09-10 02:37:10.930000', 'step': 9504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:10.960367', 'step': 9504, 'epoch': 2} {'type': 'loss', 'content': 0.11301572620868683, 'timestamp': '2025-09-10 02:37:10.962679', 'step': 9505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:10.992118', 'step': 9505, 'epoch': 2} {'type': 'loss', 'content': 0.03939777612686157, 'timestamp': '2025-09-10 02:37:10.994239', 'step': 9506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:11.023633', 'step': 9506, 'epoch': 2} {'type': 'loss', 'content': 0.12282045930624008, 'timestamp': '2025-09-10 02:37:11.025383', 'step': 9507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:11.054359', 'step': 9507, 'epoch': 2} {'type': 'loss', 'content': 0.15670643746852875, 'timestamp': '2025-09-10 02:37:11.078013', 'step': 9508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:11.107783', 'step': 9508, 'epoch': 2} {'type': 'loss', 'content': 0.07461532205343246, 'timestamp': '2025-09-10 02:37:11.109922', 'step': 9509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:11.141339', 'step': 9509, 'epoch': 2} {'type': 'loss', 'content': 0.12672802805900574, 'timestamp': '2025-09-10 02:37:11.143759', 'step': 9510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:11.174247', 'step': 9510, 'epoch': 2} {'type': 'loss', 'content': 0.12908899784088135, 'timestamp': '2025-09-10 02:37:11.177167', 'step': 9511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:11.206337', 'step': 9511, 'epoch': 2} {'type': 'loss', 'content': 0.11444535106420517, 'timestamp': '2025-09-10 02:37:11.229785', 'step': 9512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:11.260042', 'step': 9512, 'epoch': 2} {'type': 'loss', 'content': 0.244956374168396, 'timestamp': '2025-09-10 02:37:11.262146', 'step': 9513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:11.292059', 'step': 9513, 'epoch': 2} {'type': 'loss', 'content': 0.10123264789581299, 'timestamp': '2025-09-10 02:37:11.294979', 'step': 9514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:11.324486', 'step': 9514, 'epoch': 2} {'type': 'loss', 'content': 0.08206239342689514, 'timestamp': '2025-09-10 02:37:11.326772', 'step': 9515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:11.355908', 'step': 9515, 'epoch': 2} {'type': 'loss', 'content': 0.0725528672337532, 'timestamp': '2025-09-10 02:37:11.379915', 'step': 9516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:11.417980', 'step': 9516, 'epoch': 2} {'type': 'loss', 'content': 0.11340411752462387, 'timestamp': '2025-09-10 02:37:11.420664', 'step': 9517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:11.451187', 'step': 9517, 'epoch': 2} {'type': 'loss', 'content': 0.17464569211006165, 'timestamp': '2025-09-10 02:37:11.453447', 'step': 9518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:11.482693', 'step': 9518, 'epoch': 2} {'type': 'loss', 'content': 0.13584503531455994, 'timestamp': '2025-09-10 02:37:11.484870', 'step': 9519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:11.514025', 'step': 9519, 'epoch': 2} {'type': 'loss', 'content': 0.10799584537744522, 'timestamp': '2025-09-10 02:37:11.537736', 'step': 9520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:11.568035', 'step': 9520, 'epoch': 2} {'type': 'loss', 'content': 0.09765370190143585, 'timestamp': '2025-09-10 02:37:11.570587', 'step': 9521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:11.601425', 'step': 9521, 'epoch': 2} {'type': 'loss', 'content': 0.09551654756069183, 'timestamp': '2025-09-10 02:37:11.603657', 'step': 9522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:11.634028', 'step': 9522, 'epoch': 2} {'type': 'loss', 'content': 0.12116444110870361, 'timestamp': '2025-09-10 02:37:11.636254', 'step': 9523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:11.665876', 'step': 9523, 'epoch': 2} {'type': 'loss', 'content': 0.1913909614086151, 'timestamp': '2025-09-10 02:37:11.689438', 'step': 9524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:11.719195', 'step': 9524, 'epoch': 2} {'type': 'loss', 'content': 0.13338834047317505, 'timestamp': '2025-09-10 02:37:11.723519', 'step': 9525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:11.755290', 'step': 9525, 'epoch': 2} {'type': 'loss', 'content': 0.09233199059963226, 'timestamp': '2025-09-10 02:37:11.757532', 'step': 9526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:11.787173', 'step': 9526, 'epoch': 2} {'type': 'loss', 'content': 0.14307992160320282, 'timestamp': '2025-09-10 02:37:11.788957', 'step': 9527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:11.818860', 'step': 9527, 'epoch': 2} {'type': 'loss', 'content': 0.12257709354162216, 'timestamp': '2025-09-10 02:37:11.842485', 'step': 9528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:11.871910', 'step': 9528, 'epoch': 2} {'type': 'loss', 'content': 0.11993051320314407, 'timestamp': '2025-09-10 02:37:11.874616', 'step': 9529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:11.905291', 'step': 9529, 'epoch': 2} {'type': 'loss', 'content': 0.0968485176563263, 'timestamp': '2025-09-10 02:37:11.907683', 'step': 9530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:11.936947', 'step': 9530, 'epoch': 2} {'type': 'loss', 'content': 0.16021543741226196, 'timestamp': '2025-09-10 02:37:11.939369', 'step': 9531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:11.968953', 'step': 9531, 'epoch': 2} {'type': 'loss', 'content': 0.14853321015834808, 'timestamp': '2025-09-10 02:37:11.992567', 'step': 9532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.022192', 'step': 9532, 'epoch': 2} {'type': 'loss', 'content': 0.10508840531110764, 'timestamp': '2025-09-10 02:37:12.025623', 'step': 9533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:12.056084', 'step': 9533, 'epoch': 2} {'type': 'loss', 'content': 0.08731702715158463, 'timestamp': '2025-09-10 02:37:12.058380', 'step': 9534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.088369', 'step': 9534, 'epoch': 2} {'type': 'loss', 'content': 0.08061045408248901, 'timestamp': '2025-09-10 02:37:12.090589', 'step': 9535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:12.121339', 'step': 9535, 'epoch': 2} {'type': 'loss', 'content': 0.07893364876508713, 'timestamp': '2025-09-10 02:37:12.144764', 'step': 9536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:12.175413', 'step': 9536, 'epoch': 2} {'type': 'loss', 'content': 0.1924637258052826, 'timestamp': '2025-09-10 02:37:12.177972', 'step': 9537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.208338', 'step': 9537, 'epoch': 2} {'type': 'loss', 'content': 0.1496247500181198, 'timestamp': '2025-09-10 02:37:12.210834', 'step': 9538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:12.240562', 'step': 9538, 'epoch': 2} {'type': 'loss', 'content': 0.10240470618009567, 'timestamp': '2025-09-10 02:37:12.243053', 'step': 9539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:12.272028', 'step': 9539, 'epoch': 2} {'type': 'loss', 'content': 0.12397550046443939, 'timestamp': '2025-09-10 02:37:12.295368', 'step': 9540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:12.325679', 'step': 9540, 'epoch': 2} {'type': 'loss', 'content': 0.0957253947854042, 'timestamp': '2025-09-10 02:37:12.327719', 'step': 9541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:12.357948', 'step': 9541, 'epoch': 2} {'type': 'loss', 'content': 0.10544903576374054, 'timestamp': '2025-09-10 02:37:12.360025', 'step': 9542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.389418', 'step': 9542, 'epoch': 2} {'type': 'loss', 'content': 0.16063827276229858, 'timestamp': '2025-09-10 02:37:12.394750', 'step': 9543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.425337', 'step': 9543, 'epoch': 2} {'type': 'loss', 'content': 0.09183179587125778, 'timestamp': '2025-09-10 02:37:12.448620', 'step': 9544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:12.478612', 'step': 9544, 'epoch': 2} {'type': 'loss', 'content': 0.06541433930397034, 'timestamp': '2025-09-10 02:37:12.482318', 'step': 9545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:12.512023', 'step': 9545, 'epoch': 2} {'type': 'loss', 'content': 0.05787293240427971, 'timestamp': '2025-09-10 02:37:12.514224', 'step': 9546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.544131', 'step': 9546, 'epoch': 2} {'type': 'loss', 'content': 0.1316610425710678, 'timestamp': '2025-09-10 02:37:12.546409', 'step': 9547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:12.575199', 'step': 9547, 'epoch': 2} {'type': 'loss', 'content': 0.0732843279838562, 'timestamp': '2025-09-10 02:37:12.598773', 'step': 9548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.629950', 'step': 9548, 'epoch': 2} {'type': 'loss', 'content': 0.09386448562145233, 'timestamp': '2025-09-10 02:37:12.633402', 'step': 9549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.662913', 'step': 9549, 'epoch': 2} {'type': 'loss', 'content': 0.038718629628419876, 'timestamp': '2025-09-10 02:37:12.665155', 'step': 9550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.694941', 'step': 9550, 'epoch': 2} {'type': 'loss', 'content': 0.13716518878936768, 'timestamp': '2025-09-10 02:37:12.697081', 'step': 9551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.726286', 'step': 9551, 'epoch': 2} {'type': 'loss', 'content': 0.15388044714927673, 'timestamp': '2025-09-10 02:37:12.749696', 'step': 9552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:12.779650', 'step': 9552, 'epoch': 2} {'type': 'loss', 'content': 0.13376760482788086, 'timestamp': '2025-09-10 02:37:12.781602', 'step': 9553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.810319', 'step': 9553, 'epoch': 2} {'type': 'loss', 'content': 0.16599249839782715, 'timestamp': '2025-09-10 02:37:12.813629', 'step': 9554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.843492', 'step': 9554, 'epoch': 2} {'type': 'loss', 'content': 0.07567718625068665, 'timestamp': '2025-09-10 02:37:12.846022', 'step': 9555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:12.875268', 'step': 9555, 'epoch': 2} {'type': 'loss', 'content': 0.09089341014623642, 'timestamp': '2025-09-10 02:37:12.899031', 'step': 9556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.929718', 'step': 9556, 'epoch': 2} {'type': 'loss', 'content': 0.16293442249298096, 'timestamp': '2025-09-10 02:37:12.932246', 'step': 9557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:12.963220', 'step': 9557, 'epoch': 2} {'type': 'loss', 'content': 0.193278968334198, 'timestamp': '2025-09-10 02:37:12.965380', 'step': 9558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:12.994633', 'step': 9558, 'epoch': 2} {'type': 'loss', 'content': 0.10084929317235947, 'timestamp': '2025-09-10 02:37:12.996843', 'step': 9559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:13.026661', 'step': 9559, 'epoch': 2} {'type': 'loss', 'content': 0.13155338168144226, 'timestamp': '2025-09-10 02:37:13.050546', 'step': 9560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:13.080292', 'step': 9560, 'epoch': 2} {'type': 'loss', 'content': 0.1273788958787918, 'timestamp': '2025-09-10 02:37:13.082388', 'step': 9561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:13.111135', 'step': 9561, 'epoch': 2} {'type': 'loss', 'content': 0.14936506748199463, 'timestamp': '2025-09-10 02:37:13.113256', 'step': 9562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:13.142486', 'step': 9562, 'epoch': 2} {'type': 'loss', 'content': 0.12358970195055008, 'timestamp': '2025-09-10 02:37:13.144822', 'step': 9563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:13.174659', 'step': 9563, 'epoch': 2} {'type': 'loss', 'content': 0.1109779104590416, 'timestamp': '2025-09-10 02:37:13.198098', 'step': 9564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:13.228129', 'step': 9564, 'epoch': 2} {'type': 'loss', 'content': 0.139333114027977, 'timestamp': '2025-09-10 02:37:13.230575', 'step': 9565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:13.259985', 'step': 9565, 'epoch': 2} {'type': 'loss', 'content': 0.185000479221344, 'timestamp': '2025-09-10 02:37:13.261978', 'step': 9566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:13.292469', 'step': 9566, 'epoch': 2} {'type': 'loss', 'content': 0.1157471090555191, 'timestamp': '2025-09-10 02:37:13.294789', 'step': 9567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:13.324477', 'step': 9567, 'epoch': 2} {'type': 'loss', 'content': 0.11836832016706467, 'timestamp': '2025-09-10 02:37:13.347637', 'step': 9568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:13.377923', 'step': 9568, 'epoch': 2} {'type': 'loss', 'content': 0.09404280036687851, 'timestamp': '2025-09-10 02:37:13.380091', 'step': 9569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:13.412811', 'step': 9569, 'epoch': 2} {'type': 'loss', 'content': 0.08368020504713058, 'timestamp': '2025-09-10 02:37:13.415181', 'step': 9570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:13.445174', 'step': 9570, 'epoch': 2} {'type': 'loss', 'content': 0.15828724205493927, 'timestamp': '2025-09-10 02:37:13.447546', 'step': 9571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:13.476851', 'step': 9571, 'epoch': 2} {'type': 'loss', 'content': 0.126095250248909, 'timestamp': '2025-09-10 02:37:13.500683', 'step': 9572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:13.530961', 'step': 9572, 'epoch': 2} {'type': 'loss', 'content': 0.1258629411458969, 'timestamp': '2025-09-10 02:37:13.533225', 'step': 9573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:13.563180', 'step': 9573, 'epoch': 2} {'type': 'loss', 'content': 0.13228480517864227, 'timestamp': '2025-09-10 02:37:13.565599', 'step': 9574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:13.595176', 'step': 9574, 'epoch': 2} {'type': 'loss', 'content': 0.21164698898792267, 'timestamp': '2025-09-10 02:37:13.597835', 'step': 9575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:13.627053', 'step': 9575, 'epoch': 2} {'type': 'loss', 'content': 0.13924317061901093, 'timestamp': '2025-09-10 02:37:13.650567', 'step': 9576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:13.680786', 'step': 9576, 'epoch': 2} {'type': 'loss', 'content': 0.1197076216340065, 'timestamp': '2025-09-10 02:37:13.683244', 'step': 9577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:13.712684', 'step': 9577, 'epoch': 2} {'type': 'loss', 'content': 0.09226523339748383, 'timestamp': '2025-09-10 02:37:13.714923', 'step': 9578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:13.744583', 'step': 9578, 'epoch': 2} {'type': 'loss', 'content': 0.07017466425895691, 'timestamp': '2025-09-10 02:37:13.748270', 'step': 9579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:13.777590', 'step': 9579, 'epoch': 2} {'type': 'loss', 'content': 0.10915456712245941, 'timestamp': '2025-09-10 02:37:13.800937', 'step': 9580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:13.831670', 'step': 9580, 'epoch': 2} {'type': 'loss', 'content': 0.11569618433713913, 'timestamp': '2025-09-10 02:37:13.833825', 'step': 9581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:13.863035', 'step': 9581, 'epoch': 2} {'type': 'loss', 'content': 0.12001779675483704, 'timestamp': '2025-09-10 02:37:13.865170', 'step': 9582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:13.894314', 'step': 9582, 'epoch': 2} {'type': 'loss', 'content': 0.10720159113407135, 'timestamp': '2025-09-10 02:37:13.896466', 'step': 9583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:13.925656', 'step': 9583, 'epoch': 2} {'type': 'loss', 'content': 0.09450943768024445, 'timestamp': '2025-09-10 02:37:13.948992', 'step': 9584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:13.978459', 'step': 9584, 'epoch': 2} {'type': 'loss', 'content': 0.1047142893075943, 'timestamp': '2025-09-10 02:37:13.980595', 'step': 9585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.009712', 'step': 9585, 'epoch': 2} {'type': 'loss', 'content': 0.12077745050191879, 'timestamp': '2025-09-10 02:37:14.011871', 'step': 9586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.041439', 'step': 9586, 'epoch': 2} {'type': 'loss', 'content': 0.08852943032979965, 'timestamp': '2025-09-10 02:37:14.043325', 'step': 9587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:14.072394', 'step': 9587, 'epoch': 2} {'type': 'loss', 'content': 0.2143823802471161, 'timestamp': '2025-09-10 02:37:14.095971', 'step': 9588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:14.125310', 'step': 9588, 'epoch': 2} {'type': 'loss', 'content': 0.11208661645650864, 'timestamp': '2025-09-10 02:37:14.127665', 'step': 9589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:14.157688', 'step': 9589, 'epoch': 2} {'type': 'loss', 'content': 0.07806427776813507, 'timestamp': '2025-09-10 02:37:14.159782', 'step': 9590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.189222', 'step': 9590, 'epoch': 2} {'type': 'loss', 'content': 0.08786879479885101, 'timestamp': '2025-09-10 02:37:14.191444', 'step': 9591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.220095', 'step': 9591, 'epoch': 2} {'type': 'loss', 'content': 0.18808071315288544, 'timestamp': '2025-09-10 02:37:14.243861', 'step': 9592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:14.274335', 'step': 9592, 'epoch': 2} {'type': 'loss', 'content': 0.07617783546447754, 'timestamp': '2025-09-10 02:37:14.276559', 'step': 9593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.306657', 'step': 9593, 'epoch': 2} {'type': 'loss', 'content': 0.05235700681805611, 'timestamp': '2025-09-10 02:37:14.308944', 'step': 9594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:14.339583', 'step': 9594, 'epoch': 2} {'type': 'loss', 'content': 0.24516090750694275, 'timestamp': '2025-09-10 02:37:14.342105', 'step': 9595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:14.372125', 'step': 9595, 'epoch': 2} {'type': 'loss', 'content': 0.043916188180446625, 'timestamp': '2025-09-10 02:37:14.395638', 'step': 9596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.427373', 'step': 9596, 'epoch': 2} {'type': 'loss', 'content': 0.12730424106121063, 'timestamp': '2025-09-10 02:37:14.429782', 'step': 9597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:14.459780', 'step': 9597, 'epoch': 2} {'type': 'loss', 'content': 0.1810249239206314, 'timestamp': '2025-09-10 02:37:14.462086', 'step': 9598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:14.491630', 'step': 9598, 'epoch': 2} {'type': 'loss', 'content': 0.06833181530237198, 'timestamp': '2025-09-10 02:37:14.493689', 'step': 9599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.523583', 'step': 9599, 'epoch': 2} {'type': 'loss', 'content': 0.0632205680012703, 'timestamp': '2025-09-10 02:37:14.547185', 'step': 9600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:14.576331', 'step': 9600, 'epoch': 2} {'type': 'loss', 'content': 0.15788400173187256, 'timestamp': '2025-09-10 02:37:14.578546', 'step': 9601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.608801', 'step': 9601, 'epoch': 2} {'type': 'loss', 'content': 0.08344203978776932, 'timestamp': '2025-09-10 02:37:14.611342', 'step': 9602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:14.641136', 'step': 9602, 'epoch': 2} {'type': 'loss', 'content': 0.1073804572224617, 'timestamp': '2025-09-10 02:37:14.643598', 'step': 9603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.674374', 'step': 9603, 'epoch': 2} {'type': 'loss', 'content': 0.11786404997110367, 'timestamp': '2025-09-10 02:37:14.698006', 'step': 9604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:14.727768', 'step': 9604, 'epoch': 2} {'type': 'loss', 'content': 0.09235788136720657, 'timestamp': '2025-09-10 02:37:14.730365', 'step': 9605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:14.759285', 'step': 9605, 'epoch': 2} {'type': 'loss', 'content': 0.10546368360519409, 'timestamp': '2025-09-10 02:37:14.761961', 'step': 9606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:14.791342', 'step': 9606, 'epoch': 2} {'type': 'loss', 'content': 0.11225079745054245, 'timestamp': '2025-09-10 02:37:14.793388', 'step': 9607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:14.823130', 'step': 9607, 'epoch': 2} {'type': 'loss', 'content': 0.08631953597068787, 'timestamp': '2025-09-10 02:37:14.846540', 'step': 9608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:14.876949', 'step': 9608, 'epoch': 2} {'type': 'loss', 'content': 0.09890369325876236, 'timestamp': '2025-09-10 02:37:14.879135', 'step': 9609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:14.907694', 'step': 9609, 'epoch': 2} {'type': 'loss', 'content': 0.07302765548229218, 'timestamp': '2025-09-10 02:37:14.911990', 'step': 9610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:14.940856', 'step': 9610, 'epoch': 2} {'type': 'loss', 'content': 0.15094947814941406, 'timestamp': '2025-09-10 02:37:14.943243', 'step': 9611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:14.973080', 'step': 9611, 'epoch': 2} {'type': 'loss', 'content': 0.11407168209552765, 'timestamp': '2025-09-10 02:37:14.996549', 'step': 9612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:15.026497', 'step': 9612, 'epoch': 2} {'type': 'loss', 'content': 0.060216572135686874, 'timestamp': '2025-09-10 02:37:15.028764', 'step': 9613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:15.058044', 'step': 9613, 'epoch': 2} {'type': 'loss', 'content': 0.07654188573360443, 'timestamp': '2025-09-10 02:37:15.060066', 'step': 9614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:15.091124', 'step': 9614, 'epoch': 2} {'type': 'loss', 'content': 0.13078056275844574, 'timestamp': '2025-09-10 02:37:15.094981', 'step': 9615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:15.124119', 'step': 9615, 'epoch': 2} {'type': 'loss', 'content': 0.1564978063106537, 'timestamp': '2025-09-10 02:37:15.148108', 'step': 9616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:15.177974', 'step': 9616, 'epoch': 2} {'type': 'loss', 'content': 0.11787109822034836, 'timestamp': '2025-09-10 02:37:15.180506', 'step': 9617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:15.212153', 'step': 9617, 'epoch': 2} {'type': 'loss', 'content': 0.05240049958229065, 'timestamp': '2025-09-10 02:37:15.214662', 'step': 9618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:15.244557', 'step': 9618, 'epoch': 2} {'type': 'loss', 'content': 0.15124259889125824, 'timestamp': '2025-09-10 02:37:15.247437', 'step': 9619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:15.277047', 'step': 9619, 'epoch': 2} {'type': 'loss', 'content': 0.11516056954860687, 'timestamp': '2025-09-10 02:37:15.301111', 'step': 9620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:15.334245', 'step': 9620, 'epoch': 2} {'type': 'loss', 'content': 0.0971694141626358, 'timestamp': '2025-09-10 02:37:15.336908', 'step': 9621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:15.367879', 'step': 9621, 'epoch': 2} {'type': 'loss', 'content': 0.1908295750617981, 'timestamp': '2025-09-10 02:37:15.374793', 'step': 9622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:15.430255', 'step': 9622, 'epoch': 2} {'type': 'loss', 'content': 0.14680559933185577, 'timestamp': '2025-09-10 02:37:15.433238', 'step': 9623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:15.464557', 'step': 9623, 'epoch': 2} {'type': 'loss', 'content': 0.15911461412906647, 'timestamp': '2025-09-10 02:37:15.489594', 'step': 9624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:15.521136', 'step': 9624, 'epoch': 2} {'type': 'loss', 'content': 0.14917178452014923, 'timestamp': '2025-09-10 02:37:15.524243', 'step': 9625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:15.556569', 'step': 9625, 'epoch': 2} {'type': 'loss', 'content': 0.07296774536371231, 'timestamp': '2025-09-10 02:37:15.559007', 'step': 9626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:15.588767', 'step': 9626, 'epoch': 2} {'type': 'loss', 'content': 0.11156649142503738, 'timestamp': '2025-09-10 02:37:15.591089', 'step': 9627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:15.620397', 'step': 9627, 'epoch': 2} {'type': 'loss', 'content': 0.22922834753990173, 'timestamp': '2025-09-10 02:37:15.644119', 'step': 9628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:15.673456', 'step': 9628, 'epoch': 2} {'type': 'loss', 'content': 0.14573174715042114, 'timestamp': '2025-09-10 02:37:15.675730', 'step': 9629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:15.706634', 'step': 9629, 'epoch': 2} {'type': 'loss', 'content': 0.12263946980237961, 'timestamp': '2025-09-10 02:37:15.708754', 'step': 9630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:15.738691', 'step': 9630, 'epoch': 2} {'type': 'loss', 'content': 0.08853539824485779, 'timestamp': '2025-09-10 02:37:15.741077', 'step': 9631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:15.770772', 'step': 9631, 'epoch': 2} {'type': 'loss', 'content': 0.21139657497406006, 'timestamp': '2025-09-10 02:37:15.799085', 'step': 9632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:15.832111', 'step': 9632, 'epoch': 2} {'type': 'loss', 'content': 0.19449768960475922, 'timestamp': '2025-09-10 02:37:15.834479', 'step': 9633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:15.863593', 'step': 9633, 'epoch': 2} {'type': 'loss', 'content': 0.11293739080429077, 'timestamp': '2025-09-10 02:37:15.866198', 'step': 9634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:15.895452', 'step': 9634, 'epoch': 2} {'type': 'loss', 'content': 0.14548040926456451, 'timestamp': '2025-09-10 02:37:15.897618', 'step': 9635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:15.939051', 'step': 9635, 'epoch': 2} {'type': 'loss', 'content': 0.11950722336769104, 'timestamp': '2025-09-10 02:37:15.962685', 'step': 9636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:15.992486', 'step': 9636, 'epoch': 2} {'type': 'loss', 'content': 0.11476056277751923, 'timestamp': '2025-09-10 02:37:15.994705', 'step': 9637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.033589', 'step': 9637, 'epoch': 2} {'type': 'loss', 'content': 0.09382911026477814, 'timestamp': '2025-09-10 02:37:16.035916', 'step': 9638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.065552', 'step': 9638, 'epoch': 2} {'type': 'loss', 'content': 0.07761947810649872, 'timestamp': '2025-09-10 02:37:16.067835', 'step': 9639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:16.100030', 'step': 9639, 'epoch': 2} {'type': 'loss', 'content': 0.17232383787631989, 'timestamp': '2025-09-10 02:37:16.123547', 'step': 9640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:16.182465', 'step': 9640, 'epoch': 2} {'type': 'loss', 'content': 0.08912640064954758, 'timestamp': '2025-09-10 02:37:16.187733', 'step': 9641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.258817', 'step': 9641, 'epoch': 2} {'type': 'loss', 'content': 0.23407766222953796, 'timestamp': '2025-09-10 02:37:16.271718', 'step': 9642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.314599', 'step': 9642, 'epoch': 2} {'type': 'loss', 'content': 0.06624720990657806, 'timestamp': '2025-09-10 02:37:16.317530', 'step': 9643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.349998', 'step': 9643, 'epoch': 2} {'type': 'loss', 'content': 0.09556364268064499, 'timestamp': '2025-09-10 02:37:16.373602', 'step': 9644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.413055', 'step': 9644, 'epoch': 2} {'type': 'loss', 'content': 0.20917437970638275, 'timestamp': '2025-09-10 02:37:16.417080', 'step': 9645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:16.447004', 'step': 9645, 'epoch': 2} {'type': 'loss', 'content': 0.1377870887517929, 'timestamp': '2025-09-10 02:37:16.449421', 'step': 9646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.484438', 'step': 9646, 'epoch': 2} {'type': 'loss', 'content': 0.10329050570726395, 'timestamp': '2025-09-10 02:37:16.487059', 'step': 9647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:16.517991', 'step': 9647, 'epoch': 2} {'type': 'loss', 'content': 0.25808972120285034, 'timestamp': '2025-09-10 02:37:16.545560', 'step': 9648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:16.580894', 'step': 9648, 'epoch': 2} {'type': 'loss', 'content': 0.07611668109893799, 'timestamp': '2025-09-10 02:37:16.584578', 'step': 9649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.615447', 'step': 9649, 'epoch': 2} {'type': 'loss', 'content': 0.12431732565164566, 'timestamp': '2025-09-10 02:37:16.618294', 'step': 9650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:16.649531', 'step': 9650, 'epoch': 2} {'type': 'loss', 'content': 0.07353578507900238, 'timestamp': '2025-09-10 02:37:16.651801', 'step': 9651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:16.689437', 'step': 9651, 'epoch': 2} {'type': 'loss', 'content': 0.09964799880981445, 'timestamp': '2025-09-10 02:37:16.713416', 'step': 9652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:16.744048', 'step': 9652, 'epoch': 2} {'type': 'loss', 'content': 0.10276048630475998, 'timestamp': '2025-09-10 02:37:16.746696', 'step': 9653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:16.780376', 'step': 9653, 'epoch': 2} {'type': 'loss', 'content': 0.11928611248731613, 'timestamp': '2025-09-10 02:37:16.786252', 'step': 9654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:16.827581', 'step': 9654, 'epoch': 2} {'type': 'loss', 'content': 0.1438215672969818, 'timestamp': '2025-09-10 02:37:16.829893', 'step': 9655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:16.875907', 'step': 9655, 'epoch': 2} {'type': 'loss', 'content': 0.12219145894050598, 'timestamp': '2025-09-10 02:37:16.899503', 'step': 9656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.929780', 'step': 9656, 'epoch': 2} {'type': 'loss', 'content': 0.11281841993331909, 'timestamp': '2025-09-10 02:37:16.931737', 'step': 9657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:16.961417', 'step': 9657, 'epoch': 2} {'type': 'loss', 'content': 0.044669151306152344, 'timestamp': '2025-09-10 02:37:16.963552', 'step': 9658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:16.993965', 'step': 9658, 'epoch': 2} {'type': 'loss', 'content': 0.12522418797016144, 'timestamp': '2025-09-10 02:37:16.996307', 'step': 9659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:17.027767', 'step': 9659, 'epoch': 2} {'type': 'loss', 'content': 0.10293194651603699, 'timestamp': '2025-09-10 02:37:17.051568', 'step': 9660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:17.083169', 'step': 9660, 'epoch': 2} {'type': 'loss', 'content': 0.10427513718605042, 'timestamp': '2025-09-10 02:37:17.088159', 'step': 9661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:37:17.125468', 'step': 9661, 'epoch': 2} {'type': 'loss', 'content': 0.12417436391115189, 'timestamp': '2025-09-10 02:37:17.130221', 'step': 9662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:17.161621', 'step': 9662, 'epoch': 2} {'type': 'loss', 'content': 0.10697013139724731, 'timestamp': '2025-09-10 02:37:17.165309', 'step': 9663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:17.195391', 'step': 9663, 'epoch': 2} {'type': 'loss', 'content': 0.05933397263288498, 'timestamp': '2025-09-10 02:37:17.221849', 'step': 9664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:17.251715', 'step': 9664, 'epoch': 2} {'type': 'loss', 'content': 0.09264858067035675, 'timestamp': '2025-09-10 02:37:17.255948', 'step': 9665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:17.285701', 'step': 9665, 'epoch': 2} {'type': 'loss', 'content': 0.18391481041908264, 'timestamp': '2025-09-10 02:37:17.288211', 'step': 9666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:17.319048', 'step': 9666, 'epoch': 2} {'type': 'loss', 'content': 0.11673679202795029, 'timestamp': '2025-09-10 02:37:17.321190', 'step': 9667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:17.360028', 'step': 9667, 'epoch': 2} {'type': 'loss', 'content': 0.1391623467206955, 'timestamp': '2025-09-10 02:37:17.383429', 'step': 9668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:17.421398', 'step': 9668, 'epoch': 2} {'type': 'loss', 'content': 0.09198956191539764, 'timestamp': '2025-09-10 02:37:17.423723', 'step': 9669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:17.454679', 'step': 9669, 'epoch': 2} {'type': 'loss', 'content': 0.192796990275383, 'timestamp': '2025-09-10 02:37:17.459964', 'step': 9670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:17.494538', 'step': 9670, 'epoch': 2} {'type': 'loss', 'content': 0.19956442713737488, 'timestamp': '2025-09-10 02:37:17.498098', 'step': 9671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:17.529360', 'step': 9671, 'epoch': 2} {'type': 'loss', 'content': 0.23832637071609497, 'timestamp': '2025-09-10 02:37:17.555139', 'step': 9672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:17.584671', 'step': 9672, 'epoch': 2} {'type': 'loss', 'content': 0.06265487521886826, 'timestamp': '2025-09-10 02:37:17.587115', 'step': 9673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:17.617028', 'step': 9673, 'epoch': 2} {'type': 'loss', 'content': 0.11589597910642624, 'timestamp': '2025-09-10 02:37:17.620588', 'step': 9674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:17.654124', 'step': 9674, 'epoch': 2} {'type': 'loss', 'content': 0.14203280210494995, 'timestamp': '2025-09-10 02:37:17.656515', 'step': 9675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:17.686389', 'step': 9675, 'epoch': 2} {'type': 'loss', 'content': 0.16820423305034637, 'timestamp': '2025-09-10 02:37:17.709946', 'step': 9676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:17.740428', 'step': 9676, 'epoch': 2} {'type': 'loss', 'content': 0.04393424466252327, 'timestamp': '2025-09-10 02:37:17.742897', 'step': 9677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:17.773145', 'step': 9677, 'epoch': 2} {'type': 'loss', 'content': 0.18488599359989166, 'timestamp': '2025-09-10 02:37:17.775757', 'step': 9678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:17.805233', 'step': 9678, 'epoch': 2} {'type': 'loss', 'content': 0.09550207108259201, 'timestamp': '2025-09-10 02:37:17.807933', 'step': 9679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:37:17.838231', 'step': 9679, 'epoch': 2} {'type': 'loss', 'content': 0.1059122383594513, 'timestamp': '2025-09-10 02:37:17.863222', 'step': 9680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:17.895409', 'step': 9680, 'epoch': 2} {'type': 'loss', 'content': 0.05612848699092865, 'timestamp': '2025-09-10 02:37:17.897432', 'step': 9681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:17.927785', 'step': 9681, 'epoch': 2} {'type': 'loss', 'content': 0.07190165668725967, 'timestamp': '2025-09-10 02:37:17.930104', 'step': 9682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:17.960622', 'step': 9682, 'epoch': 2} {'type': 'loss', 'content': 0.18505246937274933, 'timestamp': '2025-09-10 02:37:17.962711', 'step': 9683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:17.992119', 'step': 9683, 'epoch': 2} {'type': 'loss', 'content': 0.16990913450717926, 'timestamp': '2025-09-10 02:37:18.015904', 'step': 9684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:18.046176', 'step': 9684, 'epoch': 2} {'type': 'loss', 'content': 0.11830674111843109, 'timestamp': '2025-09-10 02:37:18.048478', 'step': 9685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:18.078190', 'step': 9685, 'epoch': 2} {'type': 'loss', 'content': 0.16304822266101837, 'timestamp': '2025-09-10 02:37:18.080396', 'step': 9686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:18.109293', 'step': 9686, 'epoch': 2} {'type': 'loss', 'content': 0.11072221398353577, 'timestamp': '2025-09-10 02:37:18.111420', 'step': 9687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:18.141273', 'step': 9687, 'epoch': 2} {'type': 'loss', 'content': 0.11004306375980377, 'timestamp': '2025-09-10 02:37:18.164875', 'step': 9688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:18.197165', 'step': 9688, 'epoch': 2} {'type': 'loss', 'content': 0.10369893908500671, 'timestamp': '2025-09-10 02:37:18.199411', 'step': 9689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:18.229939', 'step': 9689, 'epoch': 2} {'type': 'loss', 'content': 0.09603181481361389, 'timestamp': '2025-09-10 02:37:18.232075', 'step': 9690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:18.262317', 'step': 9690, 'epoch': 2} {'type': 'loss', 'content': 0.10517478734254837, 'timestamp': '2025-09-10 02:37:18.264799', 'step': 9691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:18.294594', 'step': 9691, 'epoch': 2} {'type': 'loss', 'content': 0.10754204541444778, 'timestamp': '2025-09-10 02:37:18.318216', 'step': 9692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:18.349186', 'step': 9692, 'epoch': 2} {'type': 'loss', 'content': 0.07766526937484741, 'timestamp': '2025-09-10 02:37:18.351442', 'step': 9693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:18.380703', 'step': 9693, 'epoch': 2} {'type': 'loss', 'content': 0.15467454493045807, 'timestamp': '2025-09-10 02:37:18.382929', 'step': 9694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:18.412589', 'step': 9694, 'epoch': 2} {'type': 'loss', 'content': 0.13398867845535278, 'timestamp': '2025-09-10 02:37:18.418140', 'step': 9695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:18.448712', 'step': 9695, 'epoch': 2} {'type': 'loss', 'content': 0.10571466386318207, 'timestamp': '2025-09-10 02:37:18.471980', 'step': 9696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:18.502466', 'step': 9696, 'epoch': 2} {'type': 'loss', 'content': 0.14134016633033752, 'timestamp': '2025-09-10 02:37:18.504675', 'step': 9697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:18.534146', 'step': 9697, 'epoch': 2} {'type': 'loss', 'content': 0.13568195700645447, 'timestamp': '2025-09-10 02:37:18.536393', 'step': 9698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:18.565992', 'step': 9698, 'epoch': 2} {'type': 'loss', 'content': 0.09669479727745056, 'timestamp': '2025-09-10 02:37:18.568098', 'step': 9699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:18.598131', 'step': 9699, 'epoch': 2} {'type': 'loss', 'content': 0.05367732420563698, 'timestamp': '2025-09-10 02:37:18.621934', 'step': 9700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:18.652307', 'step': 9700, 'epoch': 2} {'type': 'loss', 'content': 0.15374408662319183, 'timestamp': '2025-09-10 02:37:18.654460', 'step': 9701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:18.684130', 'step': 9701, 'epoch': 2} {'type': 'loss', 'content': 0.14817702770233154, 'timestamp': '2025-09-10 02:37:18.686405', 'step': 9702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:18.716623', 'step': 9702, 'epoch': 2} {'type': 'loss', 'content': 0.18027782440185547, 'timestamp': '2025-09-10 02:37:18.718441', 'step': 9703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:18.747502', 'step': 9703, 'epoch': 2} {'type': 'loss', 'content': 0.050965696573257446, 'timestamp': '2025-09-10 02:37:18.771152', 'step': 9704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:18.800955', 'step': 9704, 'epoch': 2} {'type': 'loss', 'content': 0.16953353583812714, 'timestamp': '2025-09-10 02:37:18.803602', 'step': 9705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:18.834502', 'step': 9705, 'epoch': 2} {'type': 'loss', 'content': 0.11509936302900314, 'timestamp': '2025-09-10 02:37:18.837184', 'step': 9706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:18.866591', 'step': 9706, 'epoch': 2} {'type': 'loss', 'content': 0.10831903666257858, 'timestamp': '2025-09-10 02:37:18.868979', 'step': 9707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:18.899307', 'step': 9707, 'epoch': 2} {'type': 'loss', 'content': 0.09922092407941818, 'timestamp': '2025-09-10 02:37:18.922832', 'step': 9708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:18.955601', 'step': 9708, 'epoch': 2} {'type': 'loss', 'content': 0.07349281013011932, 'timestamp': '2025-09-10 02:37:18.957774', 'step': 9709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:18.987627', 'step': 9709, 'epoch': 2} {'type': 'loss', 'content': 0.14701491594314575, 'timestamp': '2025-09-10 02:37:18.990058', 'step': 9710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:19.019972', 'step': 9710, 'epoch': 2} {'type': 'loss', 'content': 0.06551769375801086, 'timestamp': '2025-09-10 02:37:19.022813', 'step': 9711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.053006', 'step': 9711, 'epoch': 2} {'type': 'loss', 'content': 0.11832718551158905, 'timestamp': '2025-09-10 02:37:19.076265', 'step': 9712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:19.106743', 'step': 9712, 'epoch': 2} {'type': 'loss', 'content': 0.0913788378238678, 'timestamp': '2025-09-10 02:37:19.108903', 'step': 9713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.138216', 'step': 9713, 'epoch': 2} {'type': 'loss', 'content': 0.11157555133104324, 'timestamp': '2025-09-10 02:37:19.140505', 'step': 9714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:37:19.169692', 'step': 9714, 'epoch': 2} {'type': 'loss', 'content': 0.10955323278903961, 'timestamp': '2025-09-10 02:37:19.174113', 'step': 9715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:19.204023', 'step': 9715, 'epoch': 2} {'type': 'loss', 'content': 0.07574544101953506, 'timestamp': '2025-09-10 02:37:19.229718', 'step': 9716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.267689', 'step': 9716, 'epoch': 2} {'type': 'loss', 'content': 0.15428030490875244, 'timestamp': '2025-09-10 02:37:19.270602', 'step': 9717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:19.303342', 'step': 9717, 'epoch': 2} {'type': 'loss', 'content': 0.05124853923916817, 'timestamp': '2025-09-10 02:37:19.307573', 'step': 9718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:19.344563', 'step': 9718, 'epoch': 2} {'type': 'loss', 'content': 0.16821785271167755, 'timestamp': '2025-09-10 02:37:19.349729', 'step': 9719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:19.390183', 'step': 9719, 'epoch': 2} {'type': 'loss', 'content': 0.0531810037791729, 'timestamp': '2025-09-10 02:37:19.416058', 'step': 9720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.455372', 'step': 9720, 'epoch': 2} {'type': 'loss', 'content': 0.14265473186969757, 'timestamp': '2025-09-10 02:37:19.459863', 'step': 9721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:19.496024', 'step': 9721, 'epoch': 2} {'type': 'loss', 'content': 0.1522003561258316, 'timestamp': '2025-09-10 02:37:19.498812', 'step': 9722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:19.530803', 'step': 9722, 'epoch': 2} {'type': 'loss', 'content': 0.07569222897291183, 'timestamp': '2025-09-10 02:37:19.534834', 'step': 9723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:19.566530', 'step': 9723, 'epoch': 2} {'type': 'loss', 'content': 0.09784993529319763, 'timestamp': '2025-09-10 02:37:19.592845', 'step': 9724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:19.626402', 'step': 9724, 'epoch': 2} {'type': 'loss', 'content': 0.09883207827806473, 'timestamp': '2025-09-10 02:37:19.631201', 'step': 9725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.667159', 'step': 9725, 'epoch': 2} {'type': 'loss', 'content': 0.12925899028778076, 'timestamp': '2025-09-10 02:37:19.669293', 'step': 9726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:19.698406', 'step': 9726, 'epoch': 2} {'type': 'loss', 'content': 0.14993377029895782, 'timestamp': '2025-09-10 02:37:19.700694', 'step': 9727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.730028', 'step': 9727, 'epoch': 2} {'type': 'loss', 'content': 0.04296604171395302, 'timestamp': '2025-09-10 02:37:19.753989', 'step': 9728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.783637', 'step': 9728, 'epoch': 2} {'type': 'loss', 'content': 0.17365360260009766, 'timestamp': '2025-09-10 02:37:19.785985', 'step': 9729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:19.816026', 'step': 9729, 'epoch': 2} {'type': 'loss', 'content': 0.06469112634658813, 'timestamp': '2025-09-10 02:37:19.818259', 'step': 9730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.847807', 'step': 9730, 'epoch': 2} {'type': 'loss', 'content': 0.11484736204147339, 'timestamp': '2025-09-10 02:37:19.850184', 'step': 9731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.879540', 'step': 9731, 'epoch': 2} {'type': 'loss', 'content': 0.09668514877557755, 'timestamp': '2025-09-10 02:37:19.903090', 'step': 9732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:19.932985', 'step': 9732, 'epoch': 2} {'type': 'loss', 'content': 0.12741130590438843, 'timestamp': '2025-09-10 02:37:19.935741', 'step': 9733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:19.965323', 'step': 9733, 'epoch': 2} {'type': 'loss', 'content': 0.08577106148004532, 'timestamp': '2025-09-10 02:37:19.967771', 'step': 9734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:19.997289', 'step': 9734, 'epoch': 2} {'type': 'loss', 'content': 0.13110904395580292, 'timestamp': '2025-09-10 02:37:19.999758', 'step': 9735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.029327', 'step': 9735, 'epoch': 2} {'type': 'loss', 'content': 0.17594456672668457, 'timestamp': '2025-09-10 02:37:20.053192', 'step': 9736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.082785', 'step': 9736, 'epoch': 2} {'type': 'loss', 'content': 0.1234404519200325, 'timestamp': '2025-09-10 02:37:20.084983', 'step': 9737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.114695', 'step': 9737, 'epoch': 2} {'type': 'loss', 'content': 0.13712178170681, 'timestamp': '2025-09-10 02:37:20.116723', 'step': 9738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.146680', 'step': 9738, 'epoch': 2} {'type': 'loss', 'content': 0.16569629311561584, 'timestamp': '2025-09-10 02:37:20.149528', 'step': 9739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:20.179557', 'step': 9739, 'epoch': 2} {'type': 'loss', 'content': 0.15211395919322968, 'timestamp': '2025-09-10 02:37:20.203057', 'step': 9740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.233632', 'step': 9740, 'epoch': 2} {'type': 'loss', 'content': 0.15946342051029205, 'timestamp': '2025-09-10 02:37:20.236359', 'step': 9741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.265890', 'step': 9741, 'epoch': 2} {'type': 'loss', 'content': 0.11355152726173401, 'timestamp': '2025-09-10 02:37:20.268073', 'step': 9742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:20.297540', 'step': 9742, 'epoch': 2} {'type': 'loss', 'content': 0.15607759356498718, 'timestamp': '2025-09-10 02:37:20.299725', 'step': 9743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:20.330430', 'step': 9743, 'epoch': 2} {'type': 'loss', 'content': 0.10878613591194153, 'timestamp': '2025-09-10 02:37:20.355762', 'step': 9744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.386405', 'step': 9744, 'epoch': 2} {'type': 'loss', 'content': 0.07212609797716141, 'timestamp': '2025-09-10 02:37:20.388662', 'step': 9745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:20.418528', 'step': 9745, 'epoch': 2} {'type': 'loss', 'content': 0.08253548294305801, 'timestamp': '2025-09-10 02:37:20.420706', 'step': 9746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:20.449956', 'step': 9746, 'epoch': 2} {'type': 'loss', 'content': 0.04941627010703087, 'timestamp': '2025-09-10 02:37:20.452247', 'step': 9747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:20.481775', 'step': 9747, 'epoch': 2} {'type': 'loss', 'content': 0.14267262816429138, 'timestamp': '2025-09-10 02:37:20.505218', 'step': 9748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:20.535262', 'step': 9748, 'epoch': 2} {'type': 'loss', 'content': 0.1694789081811905, 'timestamp': '2025-09-10 02:37:20.538120', 'step': 9749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:20.567496', 'step': 9749, 'epoch': 2} {'type': 'loss', 'content': 0.13747279345989227, 'timestamp': '2025-09-10 02:37:20.570058', 'step': 9750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.599635', 'step': 9750, 'epoch': 2} {'type': 'loss', 'content': 0.19333812594413757, 'timestamp': '2025-09-10 02:37:20.601922', 'step': 9751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:20.631645', 'step': 9751, 'epoch': 2} {'type': 'loss', 'content': 0.1283651888370514, 'timestamp': '2025-09-10 02:37:20.655320', 'step': 9752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.687626', 'step': 9752, 'epoch': 2} {'type': 'loss', 'content': 0.10469198226928711, 'timestamp': '2025-09-10 02:37:20.689808', 'step': 9753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:20.719026', 'step': 9753, 'epoch': 2} {'type': 'loss', 'content': 0.11136097460985184, 'timestamp': '2025-09-10 02:37:20.721186', 'step': 9754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:20.750706', 'step': 9754, 'epoch': 2} {'type': 'loss', 'content': 0.1996278613805771, 'timestamp': '2025-09-10 02:37:20.753082', 'step': 9755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:20.784302', 'step': 9755, 'epoch': 2} {'type': 'loss', 'content': 0.0827406570315361, 'timestamp': '2025-09-10 02:37:20.807684', 'step': 9756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:20.837644', 'step': 9756, 'epoch': 2} {'type': 'loss', 'content': 0.10743752121925354, 'timestamp': '2025-09-10 02:37:20.843098', 'step': 9757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:20.872820', 'step': 9757, 'epoch': 2} {'type': 'loss', 'content': 0.050703682005405426, 'timestamp': '2025-09-10 02:37:20.875788', 'step': 9758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:20.911017', 'step': 9758, 'epoch': 2} {'type': 'loss', 'content': 0.15815484523773193, 'timestamp': '2025-09-10 02:37:20.913093', 'step': 9759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:20.942218', 'step': 9759, 'epoch': 2} {'type': 'loss', 'content': 0.12595747411251068, 'timestamp': '2025-09-10 02:37:20.965567', 'step': 9760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:20.995686', 'step': 9760, 'epoch': 2} {'type': 'loss', 'content': 0.08714081346988678, 'timestamp': '2025-09-10 02:37:20.997729', 'step': 9761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:21.029774', 'step': 9761, 'epoch': 2} {'type': 'loss', 'content': 0.06319992244243622, 'timestamp': '2025-09-10 02:37:21.031493', 'step': 9762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:21.060715', 'step': 9762, 'epoch': 2} {'type': 'loss', 'content': 0.1672060787677765, 'timestamp': '2025-09-10 02:37:21.062971', 'step': 9763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:21.094375', 'step': 9763, 'epoch': 2} {'type': 'loss', 'content': 0.11496885120868683, 'timestamp': '2025-09-10 02:37:21.118232', 'step': 9764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:21.147874', 'step': 9764, 'epoch': 2} {'type': 'loss', 'content': 0.12710507214069366, 'timestamp': '2025-09-10 02:37:21.150056', 'step': 9765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:21.179473', 'step': 9765, 'epoch': 2} {'type': 'loss', 'content': 0.11142292618751526, 'timestamp': '2025-09-10 02:37:21.181593', 'step': 9766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:21.210647', 'step': 9766, 'epoch': 2} {'type': 'loss', 'content': 0.06547553092241287, 'timestamp': '2025-09-10 02:37:21.213086', 'step': 9767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:21.242030', 'step': 9767, 'epoch': 2} {'type': 'loss', 'content': 0.21344240009784698, 'timestamp': '2025-09-10 02:37:21.266941', 'step': 9768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:21.297136', 'step': 9768, 'epoch': 2} {'type': 'loss', 'content': 0.17294707894325256, 'timestamp': '2025-09-10 02:37:21.299422', 'step': 9769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:21.329123', 'step': 9769, 'epoch': 2} {'type': 'loss', 'content': 0.18096619844436646, 'timestamp': '2025-09-10 02:37:21.331562', 'step': 9770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:21.361354', 'step': 9770, 'epoch': 2} {'type': 'loss', 'content': 0.05522841960191727, 'timestamp': '2025-09-10 02:37:21.363663', 'step': 9771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:21.396252', 'step': 9771, 'epoch': 2} {'type': 'loss', 'content': 0.09037711471319199, 'timestamp': '2025-09-10 02:37:21.419969', 'step': 9772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:21.461890', 'step': 9772, 'epoch': 2} {'type': 'loss', 'content': 0.09360580891370773, 'timestamp': '2025-09-10 02:37:21.463942', 'step': 9773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:21.494372', 'step': 9773, 'epoch': 2} {'type': 'loss', 'content': 0.09098909050226212, 'timestamp': '2025-09-10 02:37:21.496824', 'step': 9774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:21.526523', 'step': 9774, 'epoch': 2} {'type': 'loss', 'content': 0.0831984356045723, 'timestamp': '2025-09-10 02:37:21.528990', 'step': 9775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:21.563377', 'step': 9775, 'epoch': 2} {'type': 'loss', 'content': 0.07787341624498367, 'timestamp': '2025-09-10 02:37:21.587001', 'step': 9776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:21.622966', 'step': 9776, 'epoch': 2} {'type': 'loss', 'content': 0.08233153074979782, 'timestamp': '2025-09-10 02:37:21.625152', 'step': 9777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:21.654340', 'step': 9777, 'epoch': 2} {'type': 'loss', 'content': 0.07453674077987671, 'timestamp': '2025-09-10 02:37:21.659676', 'step': 9778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:21.694591', 'step': 9778, 'epoch': 2} {'type': 'loss', 'content': 0.15820184350013733, 'timestamp': '2025-09-10 02:37:21.697457', 'step': 9779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:37:21.728636', 'step': 9779, 'epoch': 2} {'type': 'loss', 'content': 0.06562799960374832, 'timestamp': '2025-09-10 02:37:21.753918', 'step': 9780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:21.785719', 'step': 9780, 'epoch': 2} {'type': 'loss', 'content': 0.13081808388233185, 'timestamp': '2025-09-10 02:37:21.793838', 'step': 9781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:21.824803', 'step': 9781, 'epoch': 2} {'type': 'loss', 'content': 0.14000903069972992, 'timestamp': '2025-09-10 02:37:21.827723', 'step': 9782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:21.860657', 'step': 9782, 'epoch': 2} {'type': 'loss', 'content': 0.10333395004272461, 'timestamp': '2025-09-10 02:37:21.867423', 'step': 9783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:21.901318', 'step': 9783, 'epoch': 2} {'type': 'loss', 'content': 0.17785516381263733, 'timestamp': '2025-09-10 02:37:21.925725', 'step': 9784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:21.963027', 'step': 9784, 'epoch': 2} {'type': 'loss', 'content': 0.18311552703380585, 'timestamp': '2025-09-10 02:37:21.965744', 'step': 9785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:21.995129', 'step': 9785, 'epoch': 2} {'type': 'loss', 'content': 0.07498818635940552, 'timestamp': '2025-09-10 02:37:21.997326', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:37:30.023235', 'step': 9786, 'epoch': 2} {'type': 'pplx', 'content': 11510.468632835948, 'timestamp': '2025-09-10 02:37:30.026552', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:30.056115', 'step': 9786, 'epoch': 2} {'type': 'loss', 'content': 0.1098022386431694, 'timestamp': '2025-09-10 02:37:30.059184', 'step': 9787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:30.090506', 'step': 9787, 'epoch': 2} {'type': 'loss', 'content': 0.06125546991825104, 'timestamp': '2025-09-10 02:37:30.119231', 'step': 9788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:30.157855', 'step': 9788, 'epoch': 2} {'type': 'loss', 'content': 0.11127591878175735, 'timestamp': '2025-09-10 02:37:30.161422', 'step': 9789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:30.199993', 'step': 9789, 'epoch': 2} {'type': 'loss', 'content': 0.06794183701276779, 'timestamp': '2025-09-10 02:37:30.203271', 'step': 9790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:30.248220', 'step': 9790, 'epoch': 2} {'type': 'loss', 'content': 0.08561870455741882, 'timestamp': '2025-09-10 02:37:30.250945', 'step': 9791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:30.286626', 'step': 9791, 'epoch': 2} {'type': 'loss', 'content': 0.20925015211105347, 'timestamp': '2025-09-10 02:37:30.310437', 'step': 9792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:30.343649', 'step': 9792, 'epoch': 2} {'type': 'loss', 'content': 0.09009213745594025, 'timestamp': '2025-09-10 02:37:30.347250', 'step': 9793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:30.377874', 'step': 9793, 'epoch': 2} {'type': 'loss', 'content': 0.13034962117671967, 'timestamp': '2025-09-10 02:37:30.380231', 'step': 9794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:30.410139', 'step': 9794, 'epoch': 2} {'type': 'loss', 'content': 0.1259884387254715, 'timestamp': '2025-09-10 02:37:30.417425', 'step': 9795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:30.452194', 'step': 9795, 'epoch': 2} {'type': 'loss', 'content': 0.11212339252233505, 'timestamp': '2025-09-10 02:37:30.483534', 'step': 9796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:30.513821', 'step': 9796, 'epoch': 2} {'type': 'loss', 'content': 0.10848776996135712, 'timestamp': '2025-09-10 02:37:30.516260', 'step': 9797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:30.547204', 'step': 9797, 'epoch': 2} {'type': 'loss', 'content': 0.08388497680425644, 'timestamp': '2025-09-10 02:37:30.549465', 'step': 9798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:30.579731', 'step': 9798, 'epoch': 2} {'type': 'loss', 'content': 0.08389998972415924, 'timestamp': '2025-09-10 02:37:30.581954', 'step': 9799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:30.612252', 'step': 9799, 'epoch': 2} {'type': 'loss', 'content': 0.1461707055568695, 'timestamp': '2025-09-10 02:37:30.635816', 'step': 9800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:30.665339', 'step': 9800, 'epoch': 2} {'type': 'loss', 'content': 0.11008358746767044, 'timestamp': '2025-09-10 02:37:30.669281', 'step': 9801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:30.705570', 'step': 9801, 'epoch': 2} {'type': 'loss', 'content': 0.11073289066553116, 'timestamp': '2025-09-10 02:37:30.708168', 'step': 9802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:30.739045', 'step': 9802, 'epoch': 2} {'type': 'loss', 'content': 0.1254826933145523, 'timestamp': '2025-09-10 02:37:30.741255', 'step': 9803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:30.771388', 'step': 9803, 'epoch': 2} {'type': 'loss', 'content': 0.11041096597909927, 'timestamp': '2025-09-10 02:37:30.794899', 'step': 9804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:30.831315', 'step': 9804, 'epoch': 2} {'type': 'loss', 'content': 0.1407817006111145, 'timestamp': '2025-09-10 02:37:30.834227', 'step': 9805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:30.872038', 'step': 9805, 'epoch': 2} {'type': 'loss', 'content': 0.09966772049665451, 'timestamp': '2025-09-10 02:37:30.874250', 'step': 9806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:30.905944', 'step': 9806, 'epoch': 2} {'type': 'loss', 'content': 0.048916932195425034, 'timestamp': '2025-09-10 02:37:30.908522', 'step': 9807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:30.944142', 'step': 9807, 'epoch': 2} {'type': 'loss', 'content': 0.08358698338270187, 'timestamp': '2025-09-10 02:37:30.967945', 'step': 9808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:30.998204', 'step': 9808, 'epoch': 2} {'type': 'loss', 'content': 0.17329540848731995, 'timestamp': '2025-09-10 02:37:31.000864', 'step': 9809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:31.031671', 'step': 9809, 'epoch': 2} {'type': 'loss', 'content': 0.0789133608341217, 'timestamp': '2025-09-10 02:37:31.033942', 'step': 9810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:31.063827', 'step': 9810, 'epoch': 2} {'type': 'loss', 'content': 0.17050352692604065, 'timestamp': '2025-09-10 02:37:31.066111', 'step': 9811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:31.098651', 'step': 9811, 'epoch': 2} {'type': 'loss', 'content': 0.05461004003882408, 'timestamp': '2025-09-10 02:37:31.122546', 'step': 9812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:31.157054', 'step': 9812, 'epoch': 2} {'type': 'loss', 'content': 0.06612851470708847, 'timestamp': '2025-09-10 02:37:31.159523', 'step': 9813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:31.192994', 'step': 9813, 'epoch': 2} {'type': 'loss', 'content': 0.0395456962287426, 'timestamp': '2025-09-10 02:37:31.195124', 'step': 9814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:31.225511', 'step': 9814, 'epoch': 2} {'type': 'loss', 'content': 0.09440876543521881, 'timestamp': '2025-09-10 02:37:31.228421', 'step': 9815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:31.259520', 'step': 9815, 'epoch': 2} {'type': 'loss', 'content': 0.09585543721914291, 'timestamp': '2025-09-10 02:37:31.283288', 'step': 9816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:31.314387', 'step': 9816, 'epoch': 2} {'type': 'loss', 'content': 0.1059928685426712, 'timestamp': '2025-09-10 02:37:31.317716', 'step': 9817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:31.350616', 'step': 9817, 'epoch': 2} {'type': 'loss', 'content': 0.10100982338190079, 'timestamp': '2025-09-10 02:37:31.353093', 'step': 9818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:31.384330', 'step': 9818, 'epoch': 2} {'type': 'loss', 'content': 0.1159893199801445, 'timestamp': '2025-09-10 02:37:31.389293', 'step': 9819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:31.421336', 'step': 9819, 'epoch': 2} {'type': 'loss', 'content': 0.13549648225307465, 'timestamp': '2025-09-10 02:37:31.444768', 'step': 9820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:31.480722', 'step': 9820, 'epoch': 2} {'type': 'loss', 'content': 0.12837694585323334, 'timestamp': '2025-09-10 02:37:31.488806', 'step': 9821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:31.525864', 'step': 9821, 'epoch': 2} {'type': 'loss', 'content': 0.1399344503879547, 'timestamp': '2025-09-10 02:37:31.528418', 'step': 9822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:31.559409', 'step': 9822, 'epoch': 2} {'type': 'loss', 'content': 0.12205372750759125, 'timestamp': '2025-09-10 02:37:31.561572', 'step': 9823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:37:31.592053', 'step': 9823, 'epoch': 2} {'type': 'loss', 'content': 0.2647876441478729, 'timestamp': '2025-09-10 02:37:31.617093', 'step': 9824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:31.647222', 'step': 9824, 'epoch': 2} {'type': 'loss', 'content': 0.11298869550228119, 'timestamp': '2025-09-10 02:37:31.649517', 'step': 9825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:31.678769', 'step': 9825, 'epoch': 2} {'type': 'loss', 'content': 0.17061108350753784, 'timestamp': '2025-09-10 02:37:31.681382', 'step': 9826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:31.712953', 'step': 9826, 'epoch': 2} {'type': 'loss', 'content': 0.14671239256858826, 'timestamp': '2025-09-10 02:37:31.715538', 'step': 9827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:31.747731', 'step': 9827, 'epoch': 2} {'type': 'loss', 'content': 0.10527947545051575, 'timestamp': '2025-09-10 02:37:31.771881', 'step': 9828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:31.804908', 'step': 9828, 'epoch': 2} {'type': 'loss', 'content': 0.15435746312141418, 'timestamp': '2025-09-10 02:37:31.807404', 'step': 9829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:31.837422', 'step': 9829, 'epoch': 2} {'type': 'loss', 'content': 0.0666501447558403, 'timestamp': '2025-09-10 02:37:31.841347', 'step': 9830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:31.871029', 'step': 9830, 'epoch': 2} {'type': 'loss', 'content': 0.04526688531041145, 'timestamp': '2025-09-10 02:37:31.873542', 'step': 9831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:31.907429', 'step': 9831, 'epoch': 2} {'type': 'loss', 'content': 0.07878008484840393, 'timestamp': '2025-09-10 02:37:31.931203', 'step': 9832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:31.960773', 'step': 9832, 'epoch': 2} {'type': 'loss', 'content': 0.13488973677158356, 'timestamp': '2025-09-10 02:37:31.967125', 'step': 9833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:31.999640', 'step': 9833, 'epoch': 2} {'type': 'loss', 'content': 0.13274499773979187, 'timestamp': '2025-09-10 02:37:32.001914', 'step': 9834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:32.032355', 'step': 9834, 'epoch': 2} {'type': 'loss', 'content': 0.0715823620557785, 'timestamp': '2025-09-10 02:37:32.038093', 'step': 9835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:32.070784', 'step': 9835, 'epoch': 2} {'type': 'loss', 'content': 0.15319426357746124, 'timestamp': '2025-09-10 02:37:32.094138', 'step': 9836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:32.126289', 'step': 9836, 'epoch': 2} {'type': 'loss', 'content': 0.15384341776371002, 'timestamp': '2025-09-10 02:37:32.128473', 'step': 9837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:32.162136', 'step': 9837, 'epoch': 2} {'type': 'loss', 'content': 0.1232481524348259, 'timestamp': '2025-09-10 02:37:32.164708', 'step': 9838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:32.198823', 'step': 9838, 'epoch': 2} {'type': 'loss', 'content': 0.08932642638683319, 'timestamp': '2025-09-10 02:37:32.201303', 'step': 9839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:32.231130', 'step': 9839, 'epoch': 2} {'type': 'loss', 'content': 0.07630197703838348, 'timestamp': '2025-09-10 02:37:32.255846', 'step': 9840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:32.287023', 'step': 9840, 'epoch': 2} {'type': 'loss', 'content': 0.08938276767730713, 'timestamp': '2025-09-10 02:37:32.290051', 'step': 9841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:32.320049', 'step': 9841, 'epoch': 2} {'type': 'loss', 'content': 0.1480337381362915, 'timestamp': '2025-09-10 02:37:32.322355', 'step': 9842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:32.352789', 'step': 9842, 'epoch': 2} {'type': 'loss', 'content': 0.19232827425003052, 'timestamp': '2025-09-10 02:37:32.354954', 'step': 9843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:32.386590', 'step': 9843, 'epoch': 2} {'type': 'loss', 'content': 0.17538802325725555, 'timestamp': '2025-09-10 02:37:32.420055', 'step': 9844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:32.452130', 'step': 9844, 'epoch': 2} {'type': 'loss', 'content': 0.0860314667224884, 'timestamp': '2025-09-10 02:37:32.460875', 'step': 9845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:32.495441', 'step': 9845, 'epoch': 2} {'type': 'loss', 'content': 0.14615899324417114, 'timestamp': '2025-09-10 02:37:32.497875', 'step': 9846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:32.528023', 'step': 9846, 'epoch': 2} {'type': 'loss', 'content': 0.11503512412309647, 'timestamp': '2025-09-10 02:37:32.530598', 'step': 9847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:32.563574', 'step': 9847, 'epoch': 2} {'type': 'loss', 'content': 0.08605150133371353, 'timestamp': '2025-09-10 02:37:32.588546', 'step': 9848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:32.622184', 'step': 9848, 'epoch': 2} {'type': 'loss', 'content': 0.15139955282211304, 'timestamp': '2025-09-10 02:37:32.626063', 'step': 9849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:32.663621', 'step': 9849, 'epoch': 2} {'type': 'loss', 'content': 0.14349369704723358, 'timestamp': '2025-09-10 02:37:32.666386', 'step': 9850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:32.696559', 'step': 9850, 'epoch': 2} {'type': 'loss', 'content': 0.20039242506027222, 'timestamp': '2025-09-10 02:37:32.699242', 'step': 9851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:32.729263', 'step': 9851, 'epoch': 2} {'type': 'loss', 'content': 0.16590853035449982, 'timestamp': '2025-09-10 02:37:32.752880', 'step': 9852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:32.790819', 'step': 9852, 'epoch': 2} {'type': 'loss', 'content': 0.18890276551246643, 'timestamp': '2025-09-10 02:37:32.793628', 'step': 9853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:32.831577', 'step': 9853, 'epoch': 2} {'type': 'loss', 'content': 0.08952938765287399, 'timestamp': '2025-09-10 02:37:32.834403', 'step': 9854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:32.864093', 'step': 9854, 'epoch': 2} {'type': 'loss', 'content': 0.17503774166107178, 'timestamp': '2025-09-10 02:37:32.866564', 'step': 9855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:32.896581', 'step': 9855, 'epoch': 2} {'type': 'loss', 'content': 0.1377943605184555, 'timestamp': '2025-09-10 02:37:32.920266', 'step': 9856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:32.956284', 'step': 9856, 'epoch': 2} {'type': 'loss', 'content': 0.08002623915672302, 'timestamp': '2025-09-10 02:37:32.959237', 'step': 9857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:32.990044', 'step': 9857, 'epoch': 2} {'type': 'loss', 'content': 0.0777493417263031, 'timestamp': '2025-09-10 02:37:32.994993', 'step': 9858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:33.030853', 'step': 9858, 'epoch': 2} {'type': 'loss', 'content': 0.1792866289615631, 'timestamp': '2025-09-10 02:37:33.037394', 'step': 9859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:33.070625', 'step': 9859, 'epoch': 2} {'type': 'loss', 'content': 0.080710269510746, 'timestamp': '2025-09-10 02:37:33.094431', 'step': 9860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:33.125952', 'step': 9860, 'epoch': 2} {'type': 'loss', 'content': 0.11648672819137573, 'timestamp': '2025-09-10 02:37:33.130117', 'step': 9861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:33.159633', 'step': 9861, 'epoch': 2} {'type': 'loss', 'content': 0.09865575283765793, 'timestamp': '2025-09-10 02:37:33.162072', 'step': 9862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:33.192122', 'step': 9862, 'epoch': 2} {'type': 'loss', 'content': 0.13403230905532837, 'timestamp': '2025-09-10 02:37:33.194563', 'step': 9863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:33.226086', 'step': 9863, 'epoch': 2} {'type': 'loss', 'content': 0.10505907982587814, 'timestamp': '2025-09-10 02:37:33.250949', 'step': 9864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:33.281182', 'step': 9864, 'epoch': 2} {'type': 'loss', 'content': 0.07058990746736526, 'timestamp': '2025-09-10 02:37:33.283898', 'step': 9865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:33.314898', 'step': 9865, 'epoch': 2} {'type': 'loss', 'content': 0.07416925579309464, 'timestamp': '2025-09-10 02:37:33.317229', 'step': 9866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:33.347165', 'step': 9866, 'epoch': 2} {'type': 'loss', 'content': 0.08679085224866867, 'timestamp': '2025-09-10 02:37:33.349642', 'step': 9867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:33.380039', 'step': 9867, 'epoch': 2} {'type': 'loss', 'content': 0.06444638967514038, 'timestamp': '2025-09-10 02:37:33.403759', 'step': 9868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:33.434506', 'step': 9868, 'epoch': 2} {'type': 'loss', 'content': 0.199141263961792, 'timestamp': '2025-09-10 02:37:33.436635', 'step': 9869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:33.466481', 'step': 9869, 'epoch': 2} {'type': 'loss', 'content': 0.11911113560199738, 'timestamp': '2025-09-10 02:37:33.469340', 'step': 9870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:33.498977', 'step': 9870, 'epoch': 2} {'type': 'loss', 'content': 0.22023876011371613, 'timestamp': '2025-09-10 02:37:33.503148', 'step': 9871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:33.535408', 'step': 9871, 'epoch': 2} {'type': 'loss', 'content': 0.17234204709529877, 'timestamp': '2025-09-10 02:37:33.558987', 'step': 9872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:33.589214', 'step': 9872, 'epoch': 2} {'type': 'loss', 'content': 0.0967676118016243, 'timestamp': '2025-09-10 02:37:33.591982', 'step': 9873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:33.622719', 'step': 9873, 'epoch': 2} {'type': 'loss', 'content': 0.11278051137924194, 'timestamp': '2025-09-10 02:37:33.625036', 'step': 9874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:33.656148', 'step': 9874, 'epoch': 2} {'type': 'loss', 'content': 0.16153337061405182, 'timestamp': '2025-09-10 02:37:33.658589', 'step': 9875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:33.688911', 'step': 9875, 'epoch': 2} {'type': 'loss', 'content': 0.20816819369792938, 'timestamp': '2025-09-10 02:37:33.712533', 'step': 9876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:33.743872', 'step': 9876, 'epoch': 2} {'type': 'loss', 'content': 0.1336735188961029, 'timestamp': '2025-09-10 02:37:33.746212', 'step': 9877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:33.776770', 'step': 9877, 'epoch': 2} {'type': 'loss', 'content': 0.20041486620903015, 'timestamp': '2025-09-10 02:37:33.778984', 'step': 9878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:33.808906', 'step': 9878, 'epoch': 2} {'type': 'loss', 'content': 0.10194091498851776, 'timestamp': '2025-09-10 02:37:33.811842', 'step': 9879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:33.842257', 'step': 9879, 'epoch': 2} {'type': 'loss', 'content': 0.09628517180681229, 'timestamp': '2025-09-10 02:37:33.865530', 'step': 9880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:33.896359', 'step': 9880, 'epoch': 2} {'type': 'loss', 'content': 0.14511631429195404, 'timestamp': '2025-09-10 02:37:33.898763', 'step': 9881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:33.930584', 'step': 9881, 'epoch': 2} {'type': 'loss', 'content': 0.09925436228513718, 'timestamp': '2025-09-10 02:37:33.932842', 'step': 9882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:33.965222', 'step': 9882, 'epoch': 2} {'type': 'loss', 'content': 0.15541478991508484, 'timestamp': '2025-09-10 02:37:33.968347', 'step': 9883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:34.001524', 'step': 9883, 'epoch': 2} {'type': 'loss', 'content': 0.07569469511508942, 'timestamp': '2025-09-10 02:37:34.025124', 'step': 9884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.056126', 'step': 9884, 'epoch': 2} {'type': 'loss', 'content': 0.16592639684677124, 'timestamp': '2025-09-10 02:37:34.058472', 'step': 9885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.089192', 'step': 9885, 'epoch': 2} {'type': 'loss', 'content': 0.13183508813381195, 'timestamp': '2025-09-10 02:37:34.091766', 'step': 9886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.123125', 'step': 9886, 'epoch': 2} {'type': 'loss', 'content': 0.0694739893078804, 'timestamp': '2025-09-10 02:37:34.125461', 'step': 9887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:34.158307', 'step': 9887, 'epoch': 2} {'type': 'loss', 'content': 0.17121461033821106, 'timestamp': '2025-09-10 02:37:34.182157', 'step': 9888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.213291', 'step': 9888, 'epoch': 2} {'type': 'loss', 'content': 0.08317747712135315, 'timestamp': '2025-09-10 02:37:34.215740', 'step': 9889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:34.246150', 'step': 9889, 'epoch': 2} {'type': 'loss', 'content': 0.0839763805270195, 'timestamp': '2025-09-10 02:37:34.248519', 'step': 9890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.279201', 'step': 9890, 'epoch': 2} {'type': 'loss', 'content': 0.16511653363704681, 'timestamp': '2025-09-10 02:37:34.281295', 'step': 9891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.310928', 'step': 9891, 'epoch': 2} {'type': 'loss', 'content': 0.10454869270324707, 'timestamp': '2025-09-10 02:37:34.335721', 'step': 9892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.366673', 'step': 9892, 'epoch': 2} {'type': 'loss', 'content': 0.11201275140047073, 'timestamp': '2025-09-10 02:37:34.369339', 'step': 9893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:34.399972', 'step': 9893, 'epoch': 2} {'type': 'loss', 'content': 0.0960448607802391, 'timestamp': '2025-09-10 02:37:34.402474', 'step': 9894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.437474', 'step': 9894, 'epoch': 2} {'type': 'loss', 'content': 0.2280230075120926, 'timestamp': '2025-09-10 02:37:34.439755', 'step': 9895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.469874', 'step': 9895, 'epoch': 2} {'type': 'loss', 'content': 0.07701591402292252, 'timestamp': '2025-09-10 02:37:34.493671', 'step': 9896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.523931', 'step': 9896, 'epoch': 2} {'type': 'loss', 'content': 0.0840180441737175, 'timestamp': '2025-09-10 02:37:34.527381', 'step': 9897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.559549', 'step': 9897, 'epoch': 2} {'type': 'loss', 'content': 0.15315355360507965, 'timestamp': '2025-09-10 02:37:34.561744', 'step': 9898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:34.591595', 'step': 9898, 'epoch': 2} {'type': 'loss', 'content': 0.12202887237071991, 'timestamp': '2025-09-10 02:37:34.593724', 'step': 9899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:34.624025', 'step': 9899, 'epoch': 2} {'type': 'loss', 'content': 0.14567428827285767, 'timestamp': '2025-09-10 02:37:34.647570', 'step': 9900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.677276', 'step': 9900, 'epoch': 2} {'type': 'loss', 'content': 0.18565936386585236, 'timestamp': '2025-09-10 02:37:34.679489', 'step': 9901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.710269', 'step': 9901, 'epoch': 2} {'type': 'loss', 'content': 0.1496904492378235, 'timestamp': '2025-09-10 02:37:34.712806', 'step': 9902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:34.744128', 'step': 9902, 'epoch': 2} {'type': 'loss', 'content': 0.11482802778482437, 'timestamp': '2025-09-10 02:37:34.746468', 'step': 9903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:34.775929', 'step': 9903, 'epoch': 2} {'type': 'loss', 'content': 0.15077805519104004, 'timestamp': '2025-09-10 02:37:34.799932', 'step': 9904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:34.830369', 'step': 9904, 'epoch': 2} {'type': 'loss', 'content': 0.022175107151269913, 'timestamp': '2025-09-10 02:37:34.832727', 'step': 9905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:34.862841', 'step': 9905, 'epoch': 2} {'type': 'loss', 'content': 0.14368052780628204, 'timestamp': '2025-09-10 02:37:34.865077', 'step': 9906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:34.895269', 'step': 9906, 'epoch': 2} {'type': 'loss', 'content': 0.05652232840657234, 'timestamp': '2025-09-10 02:37:34.897424', 'step': 9907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:34.927146', 'step': 9907, 'epoch': 2} {'type': 'loss', 'content': 0.1312946081161499, 'timestamp': '2025-09-10 02:37:34.950693', 'step': 9908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:34.980706', 'step': 9908, 'epoch': 2} {'type': 'loss', 'content': 0.08694984018802643, 'timestamp': '2025-09-10 02:37:34.982966', 'step': 9909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:35.012775', 'step': 9909, 'epoch': 2} {'type': 'loss', 'content': 0.12861384451389313, 'timestamp': '2025-09-10 02:37:35.015212', 'step': 9910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:35.045630', 'step': 9910, 'epoch': 2} {'type': 'loss', 'content': 0.07086179405450821, 'timestamp': '2025-09-10 02:37:35.047695', 'step': 9911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.080168', 'step': 9911, 'epoch': 2} {'type': 'loss', 'content': 0.11971200257539749, 'timestamp': '2025-09-10 02:37:35.103648', 'step': 9912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.133727', 'step': 9912, 'epoch': 2} {'type': 'loss', 'content': 0.1605108380317688, 'timestamp': '2025-09-10 02:37:35.136181', 'step': 9913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:35.166310', 'step': 9913, 'epoch': 2} {'type': 'loss', 'content': 0.05923977121710777, 'timestamp': '2025-09-10 02:37:35.168610', 'step': 9914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.198182', 'step': 9914, 'epoch': 2} {'type': 'loss', 'content': 0.12561200559139252, 'timestamp': '2025-09-10 02:37:35.200901', 'step': 9915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:35.231452', 'step': 9915, 'epoch': 2} {'type': 'loss', 'content': 0.12272825092077255, 'timestamp': '2025-09-10 02:37:35.255180', 'step': 9916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.285481', 'step': 9916, 'epoch': 2} {'type': 'loss', 'content': 0.08713287860155106, 'timestamp': '2025-09-10 02:37:35.288072', 'step': 9917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.318811', 'step': 9917, 'epoch': 2} {'type': 'loss', 'content': 0.08851668983697891, 'timestamp': '2025-09-10 02:37:35.321164', 'step': 9918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.352682', 'step': 9918, 'epoch': 2} {'type': 'loss', 'content': 0.09629404544830322, 'timestamp': '2025-09-10 02:37:35.355133', 'step': 9919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.384731', 'step': 9919, 'epoch': 2} {'type': 'loss', 'content': 0.08896119892597198, 'timestamp': '2025-09-10 02:37:35.408097', 'step': 9920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.438269', 'step': 9920, 'epoch': 2} {'type': 'loss', 'content': 0.1441316455602646, 'timestamp': '2025-09-10 02:37:35.440315', 'step': 9921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:35.470341', 'step': 9921, 'epoch': 2} {'type': 'loss', 'content': 0.0995902493596077, 'timestamp': '2025-09-10 02:37:35.472500', 'step': 9922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:35.503195', 'step': 9922, 'epoch': 2} {'type': 'loss', 'content': 0.1699066460132599, 'timestamp': '2025-09-10 02:37:35.505457', 'step': 9923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:35.535688', 'step': 9923, 'epoch': 2} {'type': 'loss', 'content': 0.07507158815860748, 'timestamp': '2025-09-10 02:37:35.559104', 'step': 9924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:35.589267', 'step': 9924, 'epoch': 2} {'type': 'loss', 'content': 0.15238694846630096, 'timestamp': '2025-09-10 02:37:35.591594', 'step': 9925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:35.621810', 'step': 9925, 'epoch': 2} {'type': 'loss', 'content': 0.09230498969554901, 'timestamp': '2025-09-10 02:37:35.624243', 'step': 9926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.655331', 'step': 9926, 'epoch': 2} {'type': 'loss', 'content': 0.05928468331694603, 'timestamp': '2025-09-10 02:37:35.660769', 'step': 9927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:35.701257', 'step': 9927, 'epoch': 2} {'type': 'loss', 'content': 0.15925195813179016, 'timestamp': '2025-09-10 02:37:35.728222', 'step': 9928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:35.759779', 'step': 9928, 'epoch': 2} {'type': 'loss', 'content': 0.07850052416324615, 'timestamp': '2025-09-10 02:37:35.761999', 'step': 9929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:35.792034', 'step': 9929, 'epoch': 2} {'type': 'loss', 'content': 0.2683466374874115, 'timestamp': '2025-09-10 02:37:35.794468', 'step': 9930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.824531', 'step': 9930, 'epoch': 2} {'type': 'loss', 'content': 0.1026257649064064, 'timestamp': '2025-09-10 02:37:35.826940', 'step': 9931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:35.858932', 'step': 9931, 'epoch': 2} {'type': 'loss', 'content': 0.1133737713098526, 'timestamp': '2025-09-10 02:37:35.882380', 'step': 9932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.912577', 'step': 9932, 'epoch': 2} {'type': 'loss', 'content': 0.09271649271249771, 'timestamp': '2025-09-10 02:37:35.914886', 'step': 9933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:35.944863', 'step': 9933, 'epoch': 2} {'type': 'loss', 'content': 0.1651378571987152, 'timestamp': '2025-09-10 02:37:35.946970', 'step': 9934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:35.977097', 'step': 9934, 'epoch': 2} {'type': 'loss', 'content': 0.051198434084653854, 'timestamp': '2025-09-10 02:37:35.979555', 'step': 9935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:36.011020', 'step': 9935, 'epoch': 2} {'type': 'loss', 'content': 0.10504170507192612, 'timestamp': '2025-09-10 02:37:36.034987', 'step': 9936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:36.065203', 'step': 9936, 'epoch': 2} {'type': 'loss', 'content': 0.14746803045272827, 'timestamp': '2025-09-10 02:37:36.067497', 'step': 9937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:36.096865', 'step': 9937, 'epoch': 2} {'type': 'loss', 'content': 0.03445609286427498, 'timestamp': '2025-09-10 02:37:36.099385', 'step': 9938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:36.128989', 'step': 9938, 'epoch': 2} {'type': 'loss', 'content': 0.09114054590463638, 'timestamp': '2025-09-10 02:37:36.131192', 'step': 9939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:36.161662', 'step': 9939, 'epoch': 2} {'type': 'loss', 'content': 0.07557302713394165, 'timestamp': '2025-09-10 02:37:36.185083', 'step': 9940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:36.214956', 'step': 9940, 'epoch': 2} {'type': 'loss', 'content': 0.1742551624774933, 'timestamp': '2025-09-10 02:37:36.217309', 'step': 9941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:36.247009', 'step': 9941, 'epoch': 2} {'type': 'loss', 'content': 0.06476539373397827, 'timestamp': '2025-09-10 02:37:36.249304', 'step': 9942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:36.278000', 'step': 9942, 'epoch': 2} {'type': 'loss', 'content': 0.15178729593753815, 'timestamp': '2025-09-10 02:37:36.280531', 'step': 9943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:36.310869', 'step': 9943, 'epoch': 2} {'type': 'loss', 'content': 0.11559624969959259, 'timestamp': '2025-09-10 02:37:36.336015', 'step': 9944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:36.368066', 'step': 9944, 'epoch': 2} {'type': 'loss', 'content': 0.15927284955978394, 'timestamp': '2025-09-10 02:37:36.370456', 'step': 9945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:36.400802', 'step': 9945, 'epoch': 2} {'type': 'loss', 'content': 0.13379056751728058, 'timestamp': '2025-09-10 02:37:36.403255', 'step': 9946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:36.433446', 'step': 9946, 'epoch': 2} {'type': 'loss', 'content': 0.07526523619890213, 'timestamp': '2025-09-10 02:37:36.436877', 'step': 9947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:36.470856', 'step': 9947, 'epoch': 2} {'type': 'loss', 'content': 0.10976042598485947, 'timestamp': '2025-09-10 02:37:36.497947', 'step': 9948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:36.531439', 'step': 9948, 'epoch': 2} {'type': 'loss', 'content': 0.13002067804336548, 'timestamp': '2025-09-10 02:37:36.533716', 'step': 9949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:36.568924', 'step': 9949, 'epoch': 2} {'type': 'loss', 'content': 0.0717698186635971, 'timestamp': '2025-09-10 02:37:36.571102', 'step': 9950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:36.605016', 'step': 9950, 'epoch': 2} {'type': 'loss', 'content': 0.12719500064849854, 'timestamp': '2025-09-10 02:37:36.610849', 'step': 9951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:36.643603', 'step': 9951, 'epoch': 2} {'type': 'loss', 'content': 0.11484706401824951, 'timestamp': '2025-09-10 02:37:36.667041', 'step': 9952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:36.697387', 'step': 9952, 'epoch': 2} {'type': 'loss', 'content': 0.04794082045555115, 'timestamp': '2025-09-10 02:37:36.700639', 'step': 9953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:36.732529', 'step': 9953, 'epoch': 2} {'type': 'loss', 'content': 0.23598527908325195, 'timestamp': '2025-09-10 02:37:36.735767', 'step': 9954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:36.766712', 'step': 9954, 'epoch': 2} {'type': 'loss', 'content': 0.20263274013996124, 'timestamp': '2025-09-10 02:37:36.769093', 'step': 9955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:36.801064', 'step': 9955, 'epoch': 2} {'type': 'loss', 'content': 0.08528739213943481, 'timestamp': '2025-09-10 02:37:36.824416', 'step': 9956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:36.855367', 'step': 9956, 'epoch': 2} {'type': 'loss', 'content': 0.09992950409650803, 'timestamp': '2025-09-10 02:37:36.857311', 'step': 9957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:36.886680', 'step': 9957, 'epoch': 2} {'type': 'loss', 'content': 0.16235339641571045, 'timestamp': '2025-09-10 02:37:36.889764', 'step': 9958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:36.919748', 'step': 9958, 'epoch': 2} {'type': 'loss', 'content': 0.06833116710186005, 'timestamp': '2025-09-10 02:37:36.922173', 'step': 9959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:36.953900', 'step': 9959, 'epoch': 2} {'type': 'loss', 'content': 0.12991410493850708, 'timestamp': '2025-09-10 02:37:36.979837', 'step': 9960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.016954', 'step': 9960, 'epoch': 2} {'type': 'loss', 'content': 0.1325157731771469, 'timestamp': '2025-09-10 02:37:37.019470', 'step': 9961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.049929', 'step': 9961, 'epoch': 2} {'type': 'loss', 'content': 0.13688062131404877, 'timestamp': '2025-09-10 02:37:37.059445', 'step': 9962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.092451', 'step': 9962, 'epoch': 2} {'type': 'loss', 'content': 0.0763622522354126, 'timestamp': '2025-09-10 02:37:37.094958', 'step': 9963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:37.127301', 'step': 9963, 'epoch': 2} {'type': 'loss', 'content': 0.07638195157051086, 'timestamp': '2025-09-10 02:37:37.151914', 'step': 9964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:37.181250', 'step': 9964, 'epoch': 2} {'type': 'loss', 'content': 0.16196927428245544, 'timestamp': '2025-09-10 02:37:37.183571', 'step': 9965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.214123', 'step': 9965, 'epoch': 2} {'type': 'loss', 'content': 0.06214376166462898, 'timestamp': '2025-09-10 02:37:37.216166', 'step': 9966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:37.246135', 'step': 9966, 'epoch': 2} {'type': 'loss', 'content': 0.06800991296768188, 'timestamp': '2025-09-10 02:37:37.249703', 'step': 9967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.279513', 'step': 9967, 'epoch': 2} {'type': 'loss', 'content': 0.26295626163482666, 'timestamp': '2025-09-10 02:37:37.303010', 'step': 9968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:37.333032', 'step': 9968, 'epoch': 2} {'type': 'loss', 'content': 0.04858876019716263, 'timestamp': '2025-09-10 02:37:37.337389', 'step': 9969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:37.378452', 'step': 9969, 'epoch': 2} {'type': 'loss', 'content': 0.0946541428565979, 'timestamp': '2025-09-10 02:37:37.382433', 'step': 9970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.418314', 'step': 9970, 'epoch': 2} {'type': 'loss', 'content': 0.14260224997997284, 'timestamp': '2025-09-10 02:37:37.420401', 'step': 9971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:37.450579', 'step': 9971, 'epoch': 2} {'type': 'loss', 'content': 0.07147283852100372, 'timestamp': '2025-09-10 02:37:37.474922', 'step': 9972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:37.509698', 'step': 9972, 'epoch': 2} {'type': 'loss', 'content': 0.19571177661418915, 'timestamp': '2025-09-10 02:37:37.512092', 'step': 9973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:37.543571', 'step': 9973, 'epoch': 2} {'type': 'loss', 'content': 0.11313870549201965, 'timestamp': '2025-09-10 02:37:37.545749', 'step': 9974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.575177', 'step': 9974, 'epoch': 2} {'type': 'loss', 'content': 0.12642276287078857, 'timestamp': '2025-09-10 02:37:37.577680', 'step': 9975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.608018', 'step': 9975, 'epoch': 2} {'type': 'loss', 'content': 0.11779326945543289, 'timestamp': '2025-09-10 02:37:37.631679', 'step': 9976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:37.666289', 'step': 9976, 'epoch': 2} {'type': 'loss', 'content': 0.11780226230621338, 'timestamp': '2025-09-10 02:37:37.670581', 'step': 9977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:37.701734', 'step': 9977, 'epoch': 2} {'type': 'loss', 'content': 0.08224204927682877, 'timestamp': '2025-09-10 02:37:37.703945', 'step': 9978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:37.735133', 'step': 9978, 'epoch': 2} {'type': 'loss', 'content': 0.13179761171340942, 'timestamp': '2025-09-10 02:37:37.739899', 'step': 9979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:37.774431', 'step': 9979, 'epoch': 2} {'type': 'loss', 'content': 0.08553291857242584, 'timestamp': '2025-09-10 02:37:37.799540', 'step': 9980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:37.833296', 'step': 9980, 'epoch': 2} {'type': 'loss', 'content': 0.17804409563541412, 'timestamp': '2025-09-10 02:37:37.836089', 'step': 9981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.868672', 'step': 9981, 'epoch': 2} {'type': 'loss', 'content': 0.05961376056075096, 'timestamp': '2025-09-10 02:37:37.872397', 'step': 9982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:37.904182', 'step': 9982, 'epoch': 2} {'type': 'loss', 'content': 0.07486835867166519, 'timestamp': '2025-09-10 02:37:37.906500', 'step': 9983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:37.936722', 'step': 9983, 'epoch': 2} {'type': 'loss', 'content': 0.09461023658514023, 'timestamp': '2025-09-10 02:37:37.960899', 'step': 9984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:37.990611', 'step': 9984, 'epoch': 2} {'type': 'loss', 'content': 0.12293368577957153, 'timestamp': '2025-09-10 02:37:37.992797', 'step': 9985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:38.023885', 'step': 9985, 'epoch': 2} {'type': 'loss', 'content': 0.07664470374584198, 'timestamp': '2025-09-10 02:37:38.026059', 'step': 9986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:38.058137', 'step': 9986, 'epoch': 2} {'type': 'loss', 'content': 0.13189032673835754, 'timestamp': '2025-09-10 02:37:38.061381', 'step': 9987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:38.090659', 'step': 9987, 'epoch': 2} {'type': 'loss', 'content': 0.14546062052249908, 'timestamp': '2025-09-10 02:37:38.114008', 'step': 9988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:38.150341', 'step': 9988, 'epoch': 2} {'type': 'loss', 'content': 0.18390122056007385, 'timestamp': '2025-09-10 02:37:38.152850', 'step': 9989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:38.183710', 'step': 9989, 'epoch': 2} {'type': 'loss', 'content': 0.15181928873062134, 'timestamp': '2025-09-10 02:37:38.186286', 'step': 9990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:38.218031', 'step': 9990, 'epoch': 2} {'type': 'loss', 'content': 0.13942210376262665, 'timestamp': '2025-09-10 02:37:38.220442', 'step': 9991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:38.251566', 'step': 9991, 'epoch': 2} {'type': 'loss', 'content': 0.13328011333942413, 'timestamp': '2025-09-10 02:37:38.275620', 'step': 9992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:38.306923', 'step': 9992, 'epoch': 2} {'type': 'loss', 'content': 0.12922921776771545, 'timestamp': '2025-09-10 02:37:38.309124', 'step': 9993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:38.339026', 'step': 9993, 'epoch': 2} {'type': 'loss', 'content': 0.09693543612957001, 'timestamp': '2025-09-10 02:37:38.341705', 'step': 9994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:38.372315', 'step': 9994, 'epoch': 2} {'type': 'loss', 'content': 0.13464435935020447, 'timestamp': '2025-09-10 02:37:38.374766', 'step': 9995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:38.409320', 'step': 9995, 'epoch': 2} {'type': 'loss', 'content': 0.1564594805240631, 'timestamp': '2025-09-10 02:37:38.432658', 'step': 9996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:38.462092', 'step': 9996, 'epoch': 2} {'type': 'loss', 'content': 0.048143185675144196, 'timestamp': '2025-09-10 02:37:38.466243', 'step': 9997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:38.497990', 'step': 9997, 'epoch': 2} {'type': 'loss', 'content': 0.07013771682977676, 'timestamp': '2025-09-10 02:37:38.500441', 'step': 9998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:38.529912', 'step': 9998, 'epoch': 2} {'type': 'loss', 'content': 0.1025652289390564, 'timestamp': '2025-09-10 02:37:38.532091', 'step': 9999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:38.562025', 'step': 9999, 'epoch': 2} {'type': 'loss', 'content': 0.10463839024305344, 'timestamp': '2025-09-10 02:37:38.588874', 'step': 10000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10000', 'timestamp': '2025-09-10 02:37:46.084474', 'step': 10000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:46.115930', 'step': 10000, 'epoch': 2} {'type': 'loss', 'content': 0.14900465309619904, 'timestamp': '2025-09-10 02:37:46.118233', 'step': 10001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:46.148455', 'step': 10001, 'epoch': 2} {'type': 'loss', 'content': 0.09117783606052399, 'timestamp': '2025-09-10 02:37:46.150850', 'step': 10002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:46.182426', 'step': 10002, 'epoch': 2} {'type': 'loss', 'content': 0.09285281598567963, 'timestamp': '2025-09-10 02:37:46.184822', 'step': 10003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:46.214861', 'step': 10003, 'epoch': 2} {'type': 'loss', 'content': 0.1295929253101349, 'timestamp': '2025-09-10 02:37:46.238663', 'step': 10004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:46.268540', 'step': 10004, 'epoch': 2} {'type': 'loss', 'content': 0.12096875160932541, 'timestamp': '2025-09-10 02:37:46.270891', 'step': 10005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:46.300646', 'step': 10005, 'epoch': 2} {'type': 'loss', 'content': 0.1255132108926773, 'timestamp': '2025-09-10 02:37:46.302943', 'step': 10006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:46.332286', 'step': 10006, 'epoch': 2} {'type': 'loss', 'content': 0.14149267971515656, 'timestamp': '2025-09-10 02:37:46.334472', 'step': 10007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:46.365045', 'step': 10007, 'epoch': 2} {'type': 'loss', 'content': 0.2236076295375824, 'timestamp': '2025-09-10 02:37:46.388877', 'step': 10008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:46.419555', 'step': 10008, 'epoch': 2} {'type': 'loss', 'content': 0.11354134976863861, 'timestamp': '2025-09-10 02:37:46.421950', 'step': 10009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:46.452176', 'step': 10009, 'epoch': 2} {'type': 'loss', 'content': 0.07418520748615265, 'timestamp': '2025-09-10 02:37:46.454575', 'step': 10010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:46.485042', 'step': 10010, 'epoch': 2} {'type': 'loss', 'content': 0.14360038936138153, 'timestamp': '2025-09-10 02:37:46.487321', 'step': 10011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:46.518116', 'step': 10011, 'epoch': 2} {'type': 'loss', 'content': 0.057281773537397385, 'timestamp': '2025-09-10 02:37:46.541685', 'step': 10012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:46.572212', 'step': 10012, 'epoch': 2} {'type': 'loss', 'content': 0.12967713177204132, 'timestamp': '2025-09-10 02:37:46.574484', 'step': 10013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:46.605046', 'step': 10013, 'epoch': 2} {'type': 'loss', 'content': 0.11471547931432724, 'timestamp': '2025-09-10 02:37:46.608011', 'step': 10014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:46.637677', 'step': 10014, 'epoch': 2} {'type': 'loss', 'content': 0.1162693053483963, 'timestamp': '2025-09-10 02:37:46.640462', 'step': 10015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:46.669868', 'step': 10015, 'epoch': 2} {'type': 'loss', 'content': 0.07014419138431549, 'timestamp': '2025-09-10 02:37:46.693440', 'step': 10016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:46.723886', 'step': 10016, 'epoch': 2} {'type': 'loss', 'content': 0.05034778267145157, 'timestamp': '2025-09-10 02:37:46.726248', 'step': 10017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:46.756145', 'step': 10017, 'epoch': 2} {'type': 'loss', 'content': 0.10139477998018265, 'timestamp': '2025-09-10 02:37:46.758518', 'step': 10018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:46.797111', 'step': 10018, 'epoch': 2} {'type': 'loss', 'content': 0.04408568516373634, 'timestamp': '2025-09-10 02:37:46.799999', 'step': 10019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:46.831157', 'step': 10019, 'epoch': 2} {'type': 'loss', 'content': 0.1383105218410492, 'timestamp': '2025-09-10 02:37:46.854884', 'step': 10020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:46.884736', 'step': 10020, 'epoch': 2} {'type': 'loss', 'content': 0.061606116592884064, 'timestamp': '2025-09-10 02:37:46.886919', 'step': 10021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:46.919456', 'step': 10021, 'epoch': 2} {'type': 'loss', 'content': 0.0252909567207098, 'timestamp': '2025-09-10 02:37:46.921797', 'step': 10022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:46.952194', 'step': 10022, 'epoch': 2} {'type': 'loss', 'content': 0.09903854876756668, 'timestamp': '2025-09-10 02:37:46.954454', 'step': 10023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:46.994154', 'step': 10023, 'epoch': 2} {'type': 'loss', 'content': 0.03723129257559776, 'timestamp': '2025-09-10 02:37:47.018373', 'step': 10024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:47.052872', 'step': 10024, 'epoch': 2} {'type': 'loss', 'content': 0.11167212575674057, 'timestamp': '2025-09-10 02:37:47.056700', 'step': 10025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:47.098369', 'step': 10025, 'epoch': 2} {'type': 'loss', 'content': 0.08681406080722809, 'timestamp': '2025-09-10 02:37:47.103236', 'step': 10026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:47.138013', 'step': 10026, 'epoch': 2} {'type': 'loss', 'content': 0.1117217168211937, 'timestamp': '2025-09-10 02:37:47.140271', 'step': 10027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:47.170277', 'step': 10027, 'epoch': 2} {'type': 'loss', 'content': 0.11031969636678696, 'timestamp': '2025-09-10 02:37:47.193718', 'step': 10028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:47.225640', 'step': 10028, 'epoch': 2} {'type': 'loss', 'content': 0.08367941528558731, 'timestamp': '2025-09-10 02:37:47.230582', 'step': 10029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:47.263484', 'step': 10029, 'epoch': 2} {'type': 'loss', 'content': 0.10219187289476395, 'timestamp': '2025-09-10 02:37:47.265700', 'step': 10030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:47.295726', 'step': 10030, 'epoch': 2} {'type': 'loss', 'content': 0.16200891137123108, 'timestamp': '2025-09-10 02:37:47.298050', 'step': 10031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:47.327623', 'step': 10031, 'epoch': 2} {'type': 'loss', 'content': 0.07240249961614609, 'timestamp': '2025-09-10 02:37:47.352328', 'step': 10032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:47.382450', 'step': 10032, 'epoch': 2} {'type': 'loss', 'content': 0.07841506600379944, 'timestamp': '2025-09-10 02:37:47.385693', 'step': 10033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:47.422487', 'step': 10033, 'epoch': 2} {'type': 'loss', 'content': 0.04898032918572426, 'timestamp': '2025-09-10 02:37:47.425082', 'step': 10034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:37:47.458847', 'step': 10034, 'epoch': 2} {'type': 'loss', 'content': 0.10461632907390594, 'timestamp': '2025-09-10 02:37:47.462855', 'step': 10035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:47.492739', 'step': 10035, 'epoch': 2} {'type': 'loss', 'content': 0.04646069556474686, 'timestamp': '2025-09-10 02:37:47.518027', 'step': 10036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:47.555919', 'step': 10036, 'epoch': 2} {'type': 'loss', 'content': 0.05801832675933838, 'timestamp': '2025-09-10 02:37:47.559330', 'step': 10037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:47.597361', 'step': 10037, 'epoch': 2} {'type': 'loss', 'content': 0.14204277098178864, 'timestamp': '2025-09-10 02:37:47.600725', 'step': 10038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:47.632797', 'step': 10038, 'epoch': 2} {'type': 'loss', 'content': 0.14542819559574127, 'timestamp': '2025-09-10 02:37:47.636817', 'step': 10039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:47.669332', 'step': 10039, 'epoch': 2} {'type': 'loss', 'content': 0.05044190585613251, 'timestamp': '2025-09-10 02:37:47.695070', 'step': 10040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:47.730795', 'step': 10040, 'epoch': 2} {'type': 'loss', 'content': 0.09793489426374435, 'timestamp': '2025-09-10 02:37:47.733376', 'step': 10041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:47.768385', 'step': 10041, 'epoch': 2} {'type': 'loss', 'content': 0.1429620385169983, 'timestamp': '2025-09-10 02:37:47.773067', 'step': 10042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:47.812903', 'step': 10042, 'epoch': 2} {'type': 'loss', 'content': 0.13518522679805756, 'timestamp': '2025-09-10 02:37:47.817978', 'step': 10043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:47.855499', 'step': 10043, 'epoch': 2} {'type': 'loss', 'content': 0.11333313584327698, 'timestamp': '2025-09-10 02:37:47.881040', 'step': 10044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:47.914672', 'step': 10044, 'epoch': 2} {'type': 'loss', 'content': 0.11995730549097061, 'timestamp': '2025-09-10 02:37:47.916934', 'step': 10045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:47.949517', 'step': 10045, 'epoch': 2} {'type': 'loss', 'content': 0.09853775799274445, 'timestamp': '2025-09-10 02:37:47.951693', 'step': 10046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:47.986625', 'step': 10046, 'epoch': 2} {'type': 'loss', 'content': 0.12156567722558975, 'timestamp': '2025-09-10 02:37:47.988595', 'step': 10047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.020158', 'step': 10047, 'epoch': 2} {'type': 'loss', 'content': 0.11892993748188019, 'timestamp': '2025-09-10 02:37:48.044115', 'step': 10048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.074187', 'step': 10048, 'epoch': 2} {'type': 'loss', 'content': 0.09188074618577957, 'timestamp': '2025-09-10 02:37:48.076301', 'step': 10049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:48.105654', 'step': 10049, 'epoch': 2} {'type': 'loss', 'content': 0.05187668278813362, 'timestamp': '2025-09-10 02:37:48.107819', 'step': 10050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:48.137752', 'step': 10050, 'epoch': 2} {'type': 'loss', 'content': 0.08952448517084122, 'timestamp': '2025-09-10 02:37:48.139991', 'step': 10051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.169574', 'step': 10051, 'epoch': 2} {'type': 'loss', 'content': 0.07119672745466232, 'timestamp': '2025-09-10 02:37:48.193189', 'step': 10052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:48.223850', 'step': 10052, 'epoch': 2} {'type': 'loss', 'content': 0.1377916932106018, 'timestamp': '2025-09-10 02:37:48.226351', 'step': 10053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.256567', 'step': 10053, 'epoch': 2} {'type': 'loss', 'content': 0.08955740183591843, 'timestamp': '2025-09-10 02:37:48.259368', 'step': 10054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.290536', 'step': 10054, 'epoch': 2} {'type': 'loss', 'content': 0.037918947637081146, 'timestamp': '2025-09-10 02:37:48.292803', 'step': 10055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.321768', 'step': 10055, 'epoch': 2} {'type': 'loss', 'content': 0.12221904844045639, 'timestamp': '2025-09-10 02:37:48.345161', 'step': 10056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.375123', 'step': 10056, 'epoch': 2} {'type': 'loss', 'content': 0.027524719014763832, 'timestamp': '2025-09-10 02:37:48.377480', 'step': 10057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:48.407379', 'step': 10057, 'epoch': 2} {'type': 'loss', 'content': 0.08236464112997055, 'timestamp': '2025-09-10 02:37:48.409581', 'step': 10058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.441013', 'step': 10058, 'epoch': 2} {'type': 'loss', 'content': 0.10109485685825348, 'timestamp': '2025-09-10 02:37:48.443378', 'step': 10059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:48.473136', 'step': 10059, 'epoch': 2} {'type': 'loss', 'content': 0.09417301416397095, 'timestamp': '2025-09-10 02:37:48.496557', 'step': 10060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.527651', 'step': 10060, 'epoch': 2} {'type': 'loss', 'content': 0.07669045776128769, 'timestamp': '2025-09-10 02:37:48.529960', 'step': 10061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:48.559887', 'step': 10061, 'epoch': 2} {'type': 'loss', 'content': 0.05322079360485077, 'timestamp': '2025-09-10 02:37:48.562161', 'step': 10062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:48.592163', 'step': 10062, 'epoch': 2} {'type': 'loss', 'content': 0.060867901891469955, 'timestamp': '2025-09-10 02:37:48.595706', 'step': 10063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:48.625329', 'step': 10063, 'epoch': 2} {'type': 'loss', 'content': 0.07930001616477966, 'timestamp': '2025-09-10 02:37:48.648749', 'step': 10064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:48.679113', 'step': 10064, 'epoch': 2} {'type': 'loss', 'content': 0.06429623812437057, 'timestamp': '2025-09-10 02:37:48.681267', 'step': 10065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:48.711367', 'step': 10065, 'epoch': 2} {'type': 'loss', 'content': 0.17583654820919037, 'timestamp': '2025-09-10 02:37:48.713735', 'step': 10066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.744318', 'step': 10066, 'epoch': 2} {'type': 'loss', 'content': 0.05540015175938606, 'timestamp': '2025-09-10 02:37:48.746568', 'step': 10067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:48.776785', 'step': 10067, 'epoch': 2} {'type': 'loss', 'content': 0.07448022067546844, 'timestamp': '2025-09-10 02:37:48.800540', 'step': 10068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:48.830941', 'step': 10068, 'epoch': 2} {'type': 'loss', 'content': 0.1895943135023117, 'timestamp': '2025-09-10 02:37:48.833195', 'step': 10069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:48.863245', 'step': 10069, 'epoch': 2} {'type': 'loss', 'content': 0.135633647441864, 'timestamp': '2025-09-10 02:37:48.868932', 'step': 10070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:48.898487', 'step': 10070, 'epoch': 2} {'type': 'loss', 'content': 0.06366658955812454, 'timestamp': '2025-09-10 02:37:48.901547', 'step': 10071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:48.932921', 'step': 10071, 'epoch': 2} {'type': 'loss', 'content': 0.0683165043592453, 'timestamp': '2025-09-10 02:37:48.957674', 'step': 10072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:48.991217', 'step': 10072, 'epoch': 2} {'type': 'loss', 'content': 0.08135892450809479, 'timestamp': '2025-09-10 02:37:48.993375', 'step': 10073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:49.027139', 'step': 10073, 'epoch': 2} {'type': 'loss', 'content': 0.0877142995595932, 'timestamp': '2025-09-10 02:37:49.029912', 'step': 10074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:49.060677', 'step': 10074, 'epoch': 2} {'type': 'loss', 'content': 0.14587801694869995, 'timestamp': '2025-09-10 02:37:49.063081', 'step': 10075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:49.093544', 'step': 10075, 'epoch': 2} {'type': 'loss', 'content': 0.13237245380878448, 'timestamp': '2025-09-10 02:37:49.117118', 'step': 10076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:49.147535', 'step': 10076, 'epoch': 2} {'type': 'loss', 'content': 0.17779433727264404, 'timestamp': '2025-09-10 02:37:49.149910', 'step': 10077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:49.179867', 'step': 10077, 'epoch': 2} {'type': 'loss', 'content': 0.07829441875219345, 'timestamp': '2025-09-10 02:37:49.182399', 'step': 10078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:49.211067', 'step': 10078, 'epoch': 2} {'type': 'loss', 'content': 0.1650867909193039, 'timestamp': '2025-09-10 02:37:49.213544', 'step': 10079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:49.243881', 'step': 10079, 'epoch': 2} {'type': 'loss', 'content': 0.08320567011833191, 'timestamp': '2025-09-10 02:37:49.267481', 'step': 10080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:49.298540', 'step': 10080, 'epoch': 2} {'type': 'loss', 'content': 0.06069346144795418, 'timestamp': '2025-09-10 02:37:49.300619', 'step': 10081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:49.339119', 'step': 10081, 'epoch': 2} {'type': 'loss', 'content': 0.06614483892917633, 'timestamp': '2025-09-10 02:37:49.342321', 'step': 10082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:49.379379', 'step': 10082, 'epoch': 2} {'type': 'loss', 'content': 0.11537749320268631, 'timestamp': '2025-09-10 02:37:49.382695', 'step': 10083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:49.412657', 'step': 10083, 'epoch': 2} {'type': 'loss', 'content': 0.1478659212589264, 'timestamp': '2025-09-10 02:37:49.436102', 'step': 10084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:49.466545', 'step': 10084, 'epoch': 2} {'type': 'loss', 'content': 0.09227633476257324, 'timestamp': '2025-09-10 02:37:49.469800', 'step': 10085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:49.499403', 'step': 10085, 'epoch': 2} {'type': 'loss', 'content': 0.07428885996341705, 'timestamp': '2025-09-10 02:37:49.502514', 'step': 10086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:49.533313', 'step': 10086, 'epoch': 2} {'type': 'loss', 'content': 0.09930344671010971, 'timestamp': '2025-09-10 02:37:49.535801', 'step': 10087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:49.566531', 'step': 10087, 'epoch': 2} {'type': 'loss', 'content': 0.057465922087430954, 'timestamp': '2025-09-10 02:37:49.590219', 'step': 10088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:49.619552', 'step': 10088, 'epoch': 2} {'type': 'loss', 'content': 0.09199493378400803, 'timestamp': '2025-09-10 02:37:49.621496', 'step': 10089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:49.650206', 'step': 10089, 'epoch': 2} {'type': 'loss', 'content': 0.05957602709531784, 'timestamp': '2025-09-10 02:37:49.653093', 'step': 10090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:49.683572', 'step': 10090, 'epoch': 2} {'type': 'loss', 'content': 0.14945438504219055, 'timestamp': '2025-09-10 02:37:49.685548', 'step': 10091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:49.714311', 'step': 10091, 'epoch': 2} {'type': 'loss', 'content': 0.09685028344392776, 'timestamp': '2025-09-10 02:37:49.738155', 'step': 10092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:49.768579', 'step': 10092, 'epoch': 2} {'type': 'loss', 'content': 0.10566836595535278, 'timestamp': '2025-09-10 02:37:49.771018', 'step': 10093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:49.800353', 'step': 10093, 'epoch': 2} {'type': 'loss', 'content': 0.11510317772626877, 'timestamp': '2025-09-10 02:37:49.802853', 'step': 10094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:49.835937', 'step': 10094, 'epoch': 2} {'type': 'loss', 'content': 0.089279405772686, 'timestamp': '2025-09-10 02:37:49.838095', 'step': 10095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:49.867120', 'step': 10095, 'epoch': 2} {'type': 'loss', 'content': 0.10897177457809448, 'timestamp': '2025-09-10 02:37:49.891710', 'step': 10096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:49.920474', 'step': 10096, 'epoch': 2} {'type': 'loss', 'content': 0.0650768056511879, 'timestamp': '2025-09-10 02:37:49.922746', 'step': 10097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:49.952280', 'step': 10097, 'epoch': 2} {'type': 'loss', 'content': 0.0683048889040947, 'timestamp': '2025-09-10 02:37:49.954472', 'step': 10098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:49.983690', 'step': 10098, 'epoch': 2} {'type': 'loss', 'content': 0.13452576100826263, 'timestamp': '2025-09-10 02:37:49.985648', 'step': 10099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:50.015188', 'step': 10099, 'epoch': 2} {'type': 'loss', 'content': 0.12534132599830627, 'timestamp': '2025-09-10 02:37:50.038661', 'step': 10100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.069649', 'step': 10100, 'epoch': 2} {'type': 'loss', 'content': 0.13663874566555023, 'timestamp': '2025-09-10 02:37:50.071948', 'step': 10101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.102260', 'step': 10101, 'epoch': 2} {'type': 'loss', 'content': 0.09108427166938782, 'timestamp': '2025-09-10 02:37:50.104686', 'step': 10102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:50.135037', 'step': 10102, 'epoch': 2} {'type': 'loss', 'content': 0.08024236559867859, 'timestamp': '2025-09-10 02:37:50.137817', 'step': 10103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:50.167882', 'step': 10103, 'epoch': 2} {'type': 'loss', 'content': 0.06611975282430649, 'timestamp': '2025-09-10 02:37:50.191634', 'step': 10104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:50.221920', 'step': 10104, 'epoch': 2} {'type': 'loss', 'content': 0.11940596997737885, 'timestamp': '2025-09-10 02:37:50.224502', 'step': 10105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:50.255256', 'step': 10105, 'epoch': 2} {'type': 'loss', 'content': 0.10461796820163727, 'timestamp': '2025-09-10 02:37:50.257488', 'step': 10106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:50.288522', 'step': 10106, 'epoch': 2} {'type': 'loss', 'content': 0.09977526217699051, 'timestamp': '2025-09-10 02:37:50.290819', 'step': 10107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:50.320009', 'step': 10107, 'epoch': 2} {'type': 'loss', 'content': 0.05075100436806679, 'timestamp': '2025-09-10 02:37:50.343569', 'step': 10108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.374202', 'step': 10108, 'epoch': 2} {'type': 'loss', 'content': 0.10994043946266174, 'timestamp': '2025-09-10 02:37:50.376410', 'step': 10109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.406418', 'step': 10109, 'epoch': 2} {'type': 'loss', 'content': 0.1375785768032074, 'timestamp': '2025-09-10 02:37:50.408532', 'step': 10110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.438200', 'step': 10110, 'epoch': 2} {'type': 'loss', 'content': 0.14155538380146027, 'timestamp': '2025-09-10 02:37:50.440588', 'step': 10111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:50.470948', 'step': 10111, 'epoch': 2} {'type': 'loss', 'content': 0.06285610795021057, 'timestamp': '2025-09-10 02:37:50.496337', 'step': 10112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:50.527013', 'step': 10112, 'epoch': 2} {'type': 'loss', 'content': 0.10465510934591293, 'timestamp': '2025-09-10 02:37:50.529115', 'step': 10113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.560270', 'step': 10113, 'epoch': 2} {'type': 'loss', 'content': 0.043657366186380386, 'timestamp': '2025-09-10 02:37:50.562618', 'step': 10114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:50.593961', 'step': 10114, 'epoch': 2} {'type': 'loss', 'content': 0.09126890450716019, 'timestamp': '2025-09-10 02:37:50.596546', 'step': 10115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.625886', 'step': 10115, 'epoch': 2} {'type': 'loss', 'content': 0.07609950006008148, 'timestamp': '2025-09-10 02:37:50.649387', 'step': 10116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:37:50.680271', 'step': 10116, 'epoch': 2} {'type': 'loss', 'content': 0.08650276809930801, 'timestamp': '2025-09-10 02:37:50.684672', 'step': 10117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:50.713366', 'step': 10117, 'epoch': 2} {'type': 'loss', 'content': 0.050166621804237366, 'timestamp': '2025-09-10 02:37:50.715588', 'step': 10118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.746026', 'step': 10118, 'epoch': 2} {'type': 'loss', 'content': 0.0613989382982254, 'timestamp': '2025-09-10 02:37:50.748363', 'step': 10119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:50.778281', 'step': 10119, 'epoch': 2} {'type': 'loss', 'content': 0.16659779846668243, 'timestamp': '2025-09-10 02:37:50.802341', 'step': 10120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:50.833171', 'step': 10120, 'epoch': 2} {'type': 'loss', 'content': 0.19881336390972137, 'timestamp': '2025-09-10 02:37:50.835616', 'step': 10121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.864951', 'step': 10121, 'epoch': 2} {'type': 'loss', 'content': 0.09245987236499786, 'timestamp': '2025-09-10 02:37:50.866919', 'step': 10122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:50.896675', 'step': 10122, 'epoch': 2} {'type': 'loss', 'content': 0.08774035423994064, 'timestamp': '2025-09-10 02:37:50.898996', 'step': 10123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.927526', 'step': 10123, 'epoch': 2} {'type': 'loss', 'content': 0.1000155434012413, 'timestamp': '2025-09-10 02:37:50.950996', 'step': 10124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:50.981381', 'step': 10124, 'epoch': 2} {'type': 'loss', 'content': 0.14270052313804626, 'timestamp': '2025-09-10 02:37:50.983573', 'step': 10125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:51.012726', 'step': 10125, 'epoch': 2} {'type': 'loss', 'content': 0.07369611412286758, 'timestamp': '2025-09-10 02:37:51.015395', 'step': 10126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:51.045314', 'step': 10126, 'epoch': 2} {'type': 'loss', 'content': 0.15709181129932404, 'timestamp': '2025-09-10 02:37:51.047733', 'step': 10127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:51.076859', 'step': 10127, 'epoch': 2} {'type': 'loss', 'content': 0.10941164195537567, 'timestamp': '2025-09-10 02:37:51.100470', 'step': 10128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:51.130128', 'step': 10128, 'epoch': 2} {'type': 'loss', 'content': 0.15494674444198608, 'timestamp': '2025-09-10 02:37:51.132287', 'step': 10129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:51.161920', 'step': 10129, 'epoch': 2} {'type': 'loss', 'content': 0.09389735013246536, 'timestamp': '2025-09-10 02:37:51.164181', 'step': 10130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:51.193396', 'step': 10130, 'epoch': 2} {'type': 'loss', 'content': 0.09594642370939255, 'timestamp': '2025-09-10 02:37:51.195137', 'step': 10131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:51.223848', 'step': 10131, 'epoch': 2} {'type': 'loss', 'content': 0.06561321765184402, 'timestamp': '2025-09-10 02:37:51.247349', 'step': 10132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:51.277528', 'step': 10132, 'epoch': 2} {'type': 'loss', 'content': 0.08322518318891525, 'timestamp': '2025-09-10 02:37:51.280272', 'step': 10133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:51.309416', 'step': 10133, 'epoch': 2} {'type': 'loss', 'content': 0.08246272802352905, 'timestamp': '2025-09-10 02:37:51.311655', 'step': 10134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:51.341277', 'step': 10134, 'epoch': 2} {'type': 'loss', 'content': 0.12397921830415726, 'timestamp': '2025-09-10 02:37:51.343781', 'step': 10135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:37:51.375027', 'step': 10135, 'epoch': 2} {'type': 'loss', 'content': 0.06816443055868149, 'timestamp': '2025-09-10 02:37:51.400440', 'step': 10136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:51.430961', 'step': 10136, 'epoch': 2} {'type': 'loss', 'content': 0.11133412271738052, 'timestamp': '2025-09-10 02:37:51.433234', 'step': 10137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:51.462787', 'step': 10137, 'epoch': 2} {'type': 'loss', 'content': 0.07872959971427917, 'timestamp': '2025-09-10 02:37:51.465273', 'step': 10138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:51.495746', 'step': 10138, 'epoch': 2} {'type': 'loss', 'content': 0.06301572918891907, 'timestamp': '2025-09-10 02:37:51.497720', 'step': 10139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:51.527661', 'step': 10139, 'epoch': 2} {'type': 'loss', 'content': 0.12429996579885483, 'timestamp': '2025-09-10 02:37:51.555155', 'step': 10140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:51.586391', 'step': 10140, 'epoch': 2} {'type': 'loss', 'content': 0.036431197077035904, 'timestamp': '2025-09-10 02:37:51.588441', 'step': 10141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:51.617153', 'step': 10141, 'epoch': 2} {'type': 'loss', 'content': 0.09778819233179092, 'timestamp': '2025-09-10 02:37:51.619662', 'step': 10142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:51.649454', 'step': 10142, 'epoch': 2} {'type': 'loss', 'content': 0.12171780318021774, 'timestamp': '2025-09-10 02:37:51.651708', 'step': 10143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:51.682004', 'step': 10143, 'epoch': 2} {'type': 'loss', 'content': 0.11913655698299408, 'timestamp': '2025-09-10 02:37:51.705659', 'step': 10144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:51.736061', 'step': 10144, 'epoch': 2} {'type': 'loss', 'content': 0.045672591775655746, 'timestamp': '2025-09-10 02:37:51.738296', 'step': 10145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:51.768189', 'step': 10145, 'epoch': 2} {'type': 'loss', 'content': 0.0862615630030632, 'timestamp': '2025-09-10 02:37:51.770410', 'step': 10146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:51.801303', 'step': 10146, 'epoch': 2} {'type': 'loss', 'content': 0.11531222611665726, 'timestamp': '2025-09-10 02:37:51.803327', 'step': 10147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:51.833931', 'step': 10147, 'epoch': 2} {'type': 'loss', 'content': 0.10963452607393265, 'timestamp': '2025-09-10 02:37:51.858617', 'step': 10148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:51.888702', 'step': 10148, 'epoch': 2} {'type': 'loss', 'content': 0.0732884407043457, 'timestamp': '2025-09-10 02:37:51.891044', 'step': 10149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:51.921747', 'step': 10149, 'epoch': 2} {'type': 'loss', 'content': 0.11073093116283417, 'timestamp': '2025-09-10 02:37:51.924034', 'step': 10150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:51.954377', 'step': 10150, 'epoch': 2} {'type': 'loss', 'content': 0.12980562448501587, 'timestamp': '2025-09-10 02:37:51.956655', 'step': 10151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:51.985996', 'step': 10151, 'epoch': 2} {'type': 'loss', 'content': 0.1529008150100708, 'timestamp': '2025-09-10 02:37:52.011302', 'step': 10152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:52.041204', 'step': 10152, 'epoch': 2} {'type': 'loss', 'content': 0.033713553100824356, 'timestamp': '2025-09-10 02:37:52.043658', 'step': 10153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:52.074409', 'step': 10153, 'epoch': 2} {'type': 'loss', 'content': 0.11348424106836319, 'timestamp': '2025-09-10 02:37:52.076848', 'step': 10154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:52.106295', 'step': 10154, 'epoch': 2} {'type': 'loss', 'content': 0.08330027014017105, 'timestamp': '2025-09-10 02:37:52.108491', 'step': 10155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:52.137814', 'step': 10155, 'epoch': 2} {'type': 'loss', 'content': 0.08756085485219955, 'timestamp': '2025-09-10 02:37:52.162442', 'step': 10156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:52.192793', 'step': 10156, 'epoch': 2} {'type': 'loss', 'content': 0.10567104816436768, 'timestamp': '2025-09-10 02:37:52.195375', 'step': 10157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:52.224445', 'step': 10157, 'epoch': 2} {'type': 'loss', 'content': 0.11021140217781067, 'timestamp': '2025-09-10 02:37:52.226986', 'step': 10158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:52.256819', 'step': 10158, 'epoch': 2} {'type': 'loss', 'content': 0.12398161739110947, 'timestamp': '2025-09-10 02:37:52.259399', 'step': 10159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:52.289506', 'step': 10159, 'epoch': 2} {'type': 'loss', 'content': 0.07459305226802826, 'timestamp': '2025-09-10 02:37:52.314378', 'step': 10160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:52.347426', 'step': 10160, 'epoch': 2} {'type': 'loss', 'content': 0.048481423407793045, 'timestamp': '2025-09-10 02:37:52.350810', 'step': 10161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:52.380847', 'step': 10161, 'epoch': 2} {'type': 'loss', 'content': 0.08791790157556534, 'timestamp': '2025-09-10 02:37:52.383376', 'step': 10162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:52.416624', 'step': 10162, 'epoch': 2} {'type': 'loss', 'content': 0.18844303488731384, 'timestamp': '2025-09-10 02:37:52.418763', 'step': 10163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:52.448845', 'step': 10163, 'epoch': 2} {'type': 'loss', 'content': 0.11570164561271667, 'timestamp': '2025-09-10 02:37:52.472381', 'step': 10164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:52.504751', 'step': 10164, 'epoch': 2} {'type': 'loss', 'content': 0.18404991924762726, 'timestamp': '2025-09-10 02:37:52.507687', 'step': 10165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:52.540794', 'step': 10165, 'epoch': 2} {'type': 'loss', 'content': 0.12107322365045547, 'timestamp': '2025-09-10 02:37:52.542999', 'step': 10166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:52.575278', 'step': 10166, 'epoch': 2} {'type': 'loss', 'content': 0.17012466490268707, 'timestamp': '2025-09-10 02:37:52.577772', 'step': 10167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:52.608023', 'step': 10167, 'epoch': 2} {'type': 'loss', 'content': 0.11221951991319656, 'timestamp': '2025-09-10 02:37:52.631778', 'step': 10168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:52.661676', 'step': 10168, 'epoch': 2} {'type': 'loss', 'content': 0.10810358077287674, 'timestamp': '2025-09-10 02:37:52.664127', 'step': 10169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:52.693876', 'step': 10169, 'epoch': 2} {'type': 'loss', 'content': 0.21120576560497284, 'timestamp': '2025-09-10 02:37:52.696007', 'step': 10170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:52.725641', 'step': 10170, 'epoch': 2} {'type': 'loss', 'content': 0.14012503623962402, 'timestamp': '2025-09-10 02:37:52.728142', 'step': 10171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:52.757097', 'step': 10171, 'epoch': 2} {'type': 'loss', 'content': 0.13203991949558258, 'timestamp': '2025-09-10 02:37:52.780681', 'step': 10172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:52.811863', 'step': 10172, 'epoch': 2} {'type': 'loss', 'content': 0.17032934725284576, 'timestamp': '2025-09-10 02:37:52.814866', 'step': 10173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:52.844900', 'step': 10173, 'epoch': 2} {'type': 'loss', 'content': 0.05715232342481613, 'timestamp': '2025-09-10 02:37:52.846941', 'step': 10174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:52.877849', 'step': 10174, 'epoch': 2} {'type': 'loss', 'content': 0.1459740251302719, 'timestamp': '2025-09-10 02:37:52.880500', 'step': 10175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:52.910570', 'step': 10175, 'epoch': 2} {'type': 'loss', 'content': 0.11158350110054016, 'timestamp': '2025-09-10 02:37:52.934330', 'step': 10176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:52.966647', 'step': 10176, 'epoch': 2} {'type': 'loss', 'content': 0.12510451674461365, 'timestamp': '2025-09-10 02:37:52.969707', 'step': 10177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:53.003956', 'step': 10177, 'epoch': 2} {'type': 'loss', 'content': 0.1514432281255722, 'timestamp': '2025-09-10 02:37:53.006720', 'step': 10178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:53.038576', 'step': 10178, 'epoch': 2} {'type': 'loss', 'content': 0.057289935648441315, 'timestamp': '2025-09-10 02:37:53.044101', 'step': 10179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.078717', 'step': 10179, 'epoch': 2} {'type': 'loss', 'content': 0.2442038655281067, 'timestamp': '2025-09-10 02:37:53.103297', 'step': 10180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:53.135736', 'step': 10180, 'epoch': 2} {'type': 'loss', 'content': 0.14941401779651642, 'timestamp': '2025-09-10 02:37:53.137984', 'step': 10181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.168349', 'step': 10181, 'epoch': 2} {'type': 'loss', 'content': 0.12661728262901306, 'timestamp': '2025-09-10 02:37:53.170422', 'step': 10182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:53.200581', 'step': 10182, 'epoch': 2} {'type': 'loss', 'content': 0.13638074696063995, 'timestamp': '2025-09-10 02:37:53.202821', 'step': 10183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:53.232642', 'step': 10183, 'epoch': 2} {'type': 'loss', 'content': 0.08703190088272095, 'timestamp': '2025-09-10 02:37:53.255948', 'step': 10184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:53.286979', 'step': 10184, 'epoch': 2} {'type': 'loss', 'content': 0.18694022297859192, 'timestamp': '2025-09-10 02:37:53.288995', 'step': 10185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.318206', 'step': 10185, 'epoch': 2} {'type': 'loss', 'content': 0.07801104336977005, 'timestamp': '2025-09-10 02:37:53.320487', 'step': 10186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:53.350634', 'step': 10186, 'epoch': 2} {'type': 'loss', 'content': 0.1467355638742447, 'timestamp': '2025-09-10 02:37:53.352848', 'step': 10187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.384251', 'step': 10187, 'epoch': 2} {'type': 'loss', 'content': 0.13762862980365753, 'timestamp': '2025-09-10 02:37:53.407687', 'step': 10188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:53.437823', 'step': 10188, 'epoch': 2} {'type': 'loss', 'content': 0.09485220164060593, 'timestamp': '2025-09-10 02:37:53.440018', 'step': 10189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.469565', 'step': 10189, 'epoch': 2} {'type': 'loss', 'content': 0.0789148136973381, 'timestamp': '2025-09-10 02:37:53.471946', 'step': 10190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.501684', 'step': 10190, 'epoch': 2} {'type': 'loss', 'content': 0.26625770330429077, 'timestamp': '2025-09-10 02:37:53.503781', 'step': 10191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.533487', 'step': 10191, 'epoch': 2} {'type': 'loss', 'content': 0.0972108319401741, 'timestamp': '2025-09-10 02:37:53.558146', 'step': 10192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:53.589186', 'step': 10192, 'epoch': 2} {'type': 'loss', 'content': 0.09817919135093689, 'timestamp': '2025-09-10 02:37:53.591123', 'step': 10193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:53.621369', 'step': 10193, 'epoch': 2} {'type': 'loss', 'content': 0.13279765844345093, 'timestamp': '2025-09-10 02:37:53.624230', 'step': 10194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.654760', 'step': 10194, 'epoch': 2} {'type': 'loss', 'content': 0.1591004729270935, 'timestamp': '2025-09-10 02:37:53.656984', 'step': 10195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:53.688240', 'step': 10195, 'epoch': 2} {'type': 'loss', 'content': 0.16601645946502686, 'timestamp': '2025-09-10 02:37:53.711901', 'step': 10196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:53.742405', 'step': 10196, 'epoch': 2} {'type': 'loss', 'content': 0.05237995460629463, 'timestamp': '2025-09-10 02:37:53.744766', 'step': 10197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:53.774753', 'step': 10197, 'epoch': 2} {'type': 'loss', 'content': 0.14735715091228485, 'timestamp': '2025-09-10 02:37:53.778424', 'step': 10198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:53.808653', 'step': 10198, 'epoch': 2} {'type': 'loss', 'content': 0.08179470151662827, 'timestamp': '2025-09-10 02:37:53.811272', 'step': 10199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.842280', 'step': 10199, 'epoch': 2} {'type': 'loss', 'content': 0.1494063287973404, 'timestamp': '2025-09-10 02:37:53.865875', 'step': 10200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:53.896280', 'step': 10200, 'epoch': 2} {'type': 'loss', 'content': 0.1557106375694275, 'timestamp': '2025-09-10 02:37:53.898496', 'step': 10201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:53.928431', 'step': 10201, 'epoch': 2} {'type': 'loss', 'content': 0.06552287936210632, 'timestamp': '2025-09-10 02:37:53.930713', 'step': 10202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:53.960825', 'step': 10202, 'epoch': 2} {'type': 'loss', 'content': 0.11155761778354645, 'timestamp': '2025-09-10 02:37:53.964070', 'step': 10203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:53.993602', 'step': 10203, 'epoch': 2} {'type': 'loss', 'content': 0.08009286224842072, 'timestamp': '2025-09-10 02:37:54.016876', 'step': 10204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:54.047634', 'step': 10204, 'epoch': 2} {'type': 'loss', 'content': 0.13846923410892487, 'timestamp': '2025-09-10 02:37:54.049772', 'step': 10205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:54.079129', 'step': 10205, 'epoch': 2} {'type': 'loss', 'content': 0.0694720521569252, 'timestamp': '2025-09-10 02:37:54.081520', 'step': 10206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:54.111709', 'step': 10206, 'epoch': 2} {'type': 'loss', 'content': 0.0933488979935646, 'timestamp': '2025-09-10 02:37:54.114148', 'step': 10207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:54.145305', 'step': 10207, 'epoch': 2} {'type': 'loss', 'content': 0.1127547100186348, 'timestamp': '2025-09-10 02:37:54.168754', 'step': 10208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:37:54.198922', 'step': 10208, 'epoch': 2} {'type': 'loss', 'content': 0.11396604776382446, 'timestamp': '2025-09-10 02:37:54.203649', 'step': 10209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:54.233671', 'step': 10209, 'epoch': 2} {'type': 'loss', 'content': 0.10325267165899277, 'timestamp': '2025-09-10 02:37:54.236237', 'step': 10210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:54.266180', 'step': 10210, 'epoch': 2} {'type': 'loss', 'content': 0.0898650586605072, 'timestamp': '2025-09-10 02:37:54.268673', 'step': 10211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:54.298710', 'step': 10211, 'epoch': 2} {'type': 'loss', 'content': 0.07101092487573624, 'timestamp': '2025-09-10 02:37:54.321860', 'step': 10212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:54.351660', 'step': 10212, 'epoch': 2} {'type': 'loss', 'content': 0.06043264642357826, 'timestamp': '2025-09-10 02:37:54.353699', 'step': 10213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:54.383906', 'step': 10213, 'epoch': 2} {'type': 'loss', 'content': 0.19749853014945984, 'timestamp': '2025-09-10 02:37:54.386342', 'step': 10214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:54.415771', 'step': 10214, 'epoch': 2} {'type': 'loss', 'content': 0.15775088965892792, 'timestamp': '2025-09-10 02:37:54.418083', 'step': 10215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:54.447584', 'step': 10215, 'epoch': 2} {'type': 'loss', 'content': 0.08679604530334473, 'timestamp': '2025-09-10 02:37:54.471233', 'step': 10216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:54.501967', 'step': 10216, 'epoch': 2} {'type': 'loss', 'content': 0.1118827536702156, 'timestamp': '2025-09-10 02:37:54.504142', 'step': 10217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:54.532804', 'step': 10217, 'epoch': 2} {'type': 'loss', 'content': 0.16483406722545624, 'timestamp': '2025-09-10 02:37:54.541093', 'step': 10218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:54.572553', 'step': 10218, 'epoch': 2} {'type': 'loss', 'content': 0.1106887012720108, 'timestamp': '2025-09-10 02:37:54.575273', 'step': 10219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:54.606086', 'step': 10219, 'epoch': 2} {'type': 'loss', 'content': 0.05164306238293648, 'timestamp': '2025-09-10 02:37:54.629272', 'step': 10220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:54.659196', 'step': 10220, 'epoch': 2} {'type': 'loss', 'content': 0.13548725843429565, 'timestamp': '2025-09-10 02:37:54.661528', 'step': 10221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:54.691092', 'step': 10221, 'epoch': 2} {'type': 'loss', 'content': 0.08061617612838745, 'timestamp': '2025-09-10 02:37:54.693121', 'step': 10222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:37:54.722034', 'step': 10222, 'epoch': 2} {'type': 'loss', 'content': 0.08501455187797546, 'timestamp': '2025-09-10 02:37:54.724265', 'step': 10223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:54.754288', 'step': 10223, 'epoch': 2} {'type': 'loss', 'content': 0.13933023810386658, 'timestamp': '2025-09-10 02:37:54.777664', 'step': 10224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:54.813674', 'step': 10224, 'epoch': 2} {'type': 'loss', 'content': 0.13908132910728455, 'timestamp': '2025-09-10 02:37:54.816706', 'step': 10225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:54.848718', 'step': 10225, 'epoch': 2} {'type': 'loss', 'content': 0.16168265044689178, 'timestamp': '2025-09-10 02:37:54.850914', 'step': 10226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:54.880473', 'step': 10226, 'epoch': 2} {'type': 'loss', 'content': 0.1414187103509903, 'timestamp': '2025-09-10 02:37:54.883258', 'step': 10227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:54.913477', 'step': 10227, 'epoch': 2} {'type': 'loss', 'content': 0.07739084213972092, 'timestamp': '2025-09-10 02:37:54.937145', 'step': 10228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:54.967364', 'step': 10228, 'epoch': 2} {'type': 'loss', 'content': 0.12787030637264252, 'timestamp': '2025-09-10 02:37:54.969656', 'step': 10229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:55.000425', 'step': 10229, 'epoch': 2} {'type': 'loss', 'content': 0.16663570702075958, 'timestamp': '2025-09-10 02:37:55.002265', 'step': 10230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.031749', 'step': 10230, 'epoch': 2} {'type': 'loss', 'content': 0.08014454692602158, 'timestamp': '2025-09-10 02:37:55.033949', 'step': 10231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.062950', 'step': 10231, 'epoch': 2} {'type': 'loss', 'content': 0.18987472355365753, 'timestamp': '2025-09-10 02:37:55.086314', 'step': 10232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:37:55.122249', 'step': 10232, 'epoch': 2} {'type': 'loss', 'content': 0.0543396957218647, 'timestamp': '2025-09-10 02:37:55.125351', 'step': 10233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:55.156929', 'step': 10233, 'epoch': 2} {'type': 'loss', 'content': 0.17026309669017792, 'timestamp': '2025-09-10 02:37:55.159002', 'step': 10234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:37:55.187742', 'step': 10234, 'epoch': 2} {'type': 'loss', 'content': 0.11663138121366501, 'timestamp': '2025-09-10 02:37:55.190578', 'step': 10235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:55.220711', 'step': 10235, 'epoch': 2} {'type': 'loss', 'content': 0.07802949100732803, 'timestamp': '2025-09-10 02:37:55.244640', 'step': 10236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:55.275213', 'step': 10236, 'epoch': 2} {'type': 'loss', 'content': 0.14509569108486176, 'timestamp': '2025-09-10 02:37:55.281743', 'step': 10237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.315188', 'step': 10237, 'epoch': 2} {'type': 'loss', 'content': 0.11761777848005295, 'timestamp': '2025-09-10 02:37:55.317647', 'step': 10238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.347336', 'step': 10238, 'epoch': 2} {'type': 'loss', 'content': 0.0724129006266594, 'timestamp': '2025-09-10 02:37:55.350911', 'step': 10239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.384036', 'step': 10239, 'epoch': 2} {'type': 'loss', 'content': 0.16677328944206238, 'timestamp': '2025-09-10 02:37:55.408593', 'step': 10240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:55.440437', 'step': 10240, 'epoch': 2} {'type': 'loss', 'content': 0.08598494529724121, 'timestamp': '2025-09-10 02:37:55.442527', 'step': 10241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:55.472185', 'step': 10241, 'epoch': 2} {'type': 'loss', 'content': 0.1458459347486496, 'timestamp': '2025-09-10 02:37:55.474250', 'step': 10242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:55.515435', 'step': 10242, 'epoch': 2} {'type': 'loss', 'content': 0.1551988124847412, 'timestamp': '2025-09-10 02:37:55.517592', 'step': 10243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:37:55.547958', 'step': 10243, 'epoch': 2} {'type': 'loss', 'content': 0.14143849909305573, 'timestamp': '2025-09-10 02:37:55.571859', 'step': 10244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.601656', 'step': 10244, 'epoch': 2} {'type': 'loss', 'content': 0.05072515457868576, 'timestamp': '2025-09-10 02:37:55.604255', 'step': 10245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.639185', 'step': 10245, 'epoch': 2} {'type': 'loss', 'content': 0.1914493441581726, 'timestamp': '2025-09-10 02:37:55.641475', 'step': 10246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.670780', 'step': 10246, 'epoch': 2} {'type': 'loss', 'content': 0.11176837980747223, 'timestamp': '2025-09-10 02:37:55.673068', 'step': 10247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.701938', 'step': 10247, 'epoch': 2} {'type': 'loss', 'content': 0.08520820736885071, 'timestamp': '2025-09-10 02:37:55.725533', 'step': 10248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:55.755480', 'step': 10248, 'epoch': 2} {'type': 'loss', 'content': 0.09032727777957916, 'timestamp': '2025-09-10 02:37:55.757599', 'step': 10249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.787407', 'step': 10249, 'epoch': 2} {'type': 'loss', 'content': 0.15207995474338531, 'timestamp': '2025-09-10 02:37:55.789341', 'step': 10250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:37:55.820528', 'step': 10250, 'epoch': 2} {'type': 'loss', 'content': 0.08409188687801361, 'timestamp': '2025-09-10 02:37:55.823366', 'step': 10251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:37:55.852024', 'step': 10251, 'epoch': 2} {'type': 'loss', 'content': 0.2772921621799469, 'timestamp': '2025-09-10 02:37:55.875387', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:38:03.823875', 'step': 10252, 'epoch': 2} {'type': 'pplx', 'content': 13125.243165981052, 'timestamp': '2025-09-10 02:38:03.829197', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:03.863186', 'step': 10252, 'epoch': 2} {'type': 'loss', 'content': 0.12517577409744263, 'timestamp': '2025-09-10 02:38:03.870224', 'step': 10253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:03.906874', 'step': 10253, 'epoch': 2} {'type': 'loss', 'content': 0.1255587935447693, 'timestamp': '2025-09-10 02:38:03.909307', 'step': 10254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:03.939691', 'step': 10254, 'epoch': 2} {'type': 'loss', 'content': 0.07639027386903763, 'timestamp': '2025-09-10 02:38:03.942072', 'step': 10255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:03.975825', 'step': 10255, 'epoch': 2} {'type': 'loss', 'content': 0.09676049649715424, 'timestamp': '2025-09-10 02:38:03.999620', 'step': 10256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:04.031205', 'step': 10256, 'epoch': 2} {'type': 'loss', 'content': 0.09802374988794327, 'timestamp': '2025-09-10 02:38:04.033262', 'step': 10257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:04.063134', 'step': 10257, 'epoch': 2} {'type': 'loss', 'content': 0.13014298677444458, 'timestamp': '2025-09-10 02:38:04.067460', 'step': 10258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:04.098302', 'step': 10258, 'epoch': 2} {'type': 'loss', 'content': 0.052452437579631805, 'timestamp': '2025-09-10 02:38:04.101076', 'step': 10259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:04.132596', 'step': 10259, 'epoch': 2} {'type': 'loss', 'content': 0.092806875705719, 'timestamp': '2025-09-10 02:38:04.160427', 'step': 10260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:04.193954', 'step': 10260, 'epoch': 2} {'type': 'loss', 'content': 0.05424332246184349, 'timestamp': '2025-09-10 02:38:04.199996', 'step': 10261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:04.244039', 'step': 10261, 'epoch': 2} {'type': 'loss', 'content': 0.12110109627246857, 'timestamp': '2025-09-10 02:38:04.261502', 'step': 10262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:04.342783', 'step': 10262, 'epoch': 2} {'type': 'loss', 'content': 0.04575898498296738, 'timestamp': '2025-09-10 02:38:04.345782', 'step': 10263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:04.420684', 'step': 10263, 'epoch': 2} {'type': 'loss', 'content': 0.14471159875392914, 'timestamp': '2025-09-10 02:38:04.457434', 'step': 10264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:04.492677', 'step': 10264, 'epoch': 2} {'type': 'loss', 'content': 0.13736046850681305, 'timestamp': '2025-09-10 02:38:04.494885', 'step': 10265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:04.530059', 'step': 10265, 'epoch': 2} {'type': 'loss', 'content': 0.07797751575708389, 'timestamp': '2025-09-10 02:38:04.532747', 'step': 10266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:04.563395', 'step': 10266, 'epoch': 2} {'type': 'loss', 'content': 0.16497965157032013, 'timestamp': '2025-09-10 02:38:04.568833', 'step': 10267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:04.600137', 'step': 10267, 'epoch': 2} {'type': 'loss', 'content': 0.1540786474943161, 'timestamp': '2025-09-10 02:38:04.624348', 'step': 10268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:04.657198', 'step': 10268, 'epoch': 2} {'type': 'loss', 'content': 0.1659795194864273, 'timestamp': '2025-09-10 02:38:04.659744', 'step': 10269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:04.690180', 'step': 10269, 'epoch': 2} {'type': 'loss', 'content': 0.14055342972278595, 'timestamp': '2025-09-10 02:38:04.692561', 'step': 10270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:04.724028', 'step': 10270, 'epoch': 2} {'type': 'loss', 'content': 0.07657773047685623, 'timestamp': '2025-09-10 02:38:04.734988', 'step': 10271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:04.824889', 'step': 10271, 'epoch': 2} {'type': 'loss', 'content': 0.0372280590236187, 'timestamp': '2025-09-10 02:38:04.866757', 'step': 10272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:04.936744', 'step': 10272, 'epoch': 2} {'type': 'loss', 'content': 0.10661493241786957, 'timestamp': '2025-09-10 02:38:04.940716', 'step': 10273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:04.986417', 'step': 10273, 'epoch': 2} {'type': 'loss', 'content': 0.08329315483570099, 'timestamp': '2025-09-10 02:38:04.996461', 'step': 10274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.051872', 'step': 10274, 'epoch': 2} {'type': 'loss', 'content': 0.10282719135284424, 'timestamp': '2025-09-10 02:38:05.054370', 'step': 10275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:05.087159', 'step': 10275, 'epoch': 2} {'type': 'loss', 'content': 0.12715862691402435, 'timestamp': '2025-09-10 02:38:05.110766', 'step': 10276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:05.145356', 'step': 10276, 'epoch': 2} {'type': 'loss', 'content': 0.08561083674430847, 'timestamp': '2025-09-10 02:38:05.148263', 'step': 10277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.179046', 'step': 10277, 'epoch': 2} {'type': 'loss', 'content': 0.09013872593641281, 'timestamp': '2025-09-10 02:38:05.181672', 'step': 10278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:05.212931', 'step': 10278, 'epoch': 2} {'type': 'loss', 'content': 0.21639484167099, 'timestamp': '2025-09-10 02:38:05.216680', 'step': 10279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:05.253372', 'step': 10279, 'epoch': 2} {'type': 'loss', 'content': 0.13400866091251373, 'timestamp': '2025-09-10 02:38:05.277351', 'step': 10280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.307718', 'step': 10280, 'epoch': 2} {'type': 'loss', 'content': 0.14165426790714264, 'timestamp': '2025-09-10 02:38:05.309966', 'step': 10281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.340362', 'step': 10281, 'epoch': 2} {'type': 'loss', 'content': 0.18336166441440582, 'timestamp': '2025-09-10 02:38:05.345537', 'step': 10282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:05.405136', 'step': 10282, 'epoch': 2} {'type': 'loss', 'content': 0.08916481584310532, 'timestamp': '2025-09-10 02:38:05.419279', 'step': 10283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.485348', 'step': 10283, 'epoch': 2} {'type': 'loss', 'content': 0.05793339014053345, 'timestamp': '2025-09-10 02:38:05.510909', 'step': 10284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:05.545999', 'step': 10284, 'epoch': 2} {'type': 'loss', 'content': 0.17212624847888947, 'timestamp': '2025-09-10 02:38:05.549125', 'step': 10285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:05.584453', 'step': 10285, 'epoch': 2} {'type': 'loss', 'content': 0.11738192290067673, 'timestamp': '2025-09-10 02:38:05.588258', 'step': 10286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.621934', 'step': 10286, 'epoch': 2} {'type': 'loss', 'content': 0.12371936440467834, 'timestamp': '2025-09-10 02:38:05.626174', 'step': 10287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:05.667139', 'step': 10287, 'epoch': 2} {'type': 'loss', 'content': 0.06712199747562408, 'timestamp': '2025-09-10 02:38:05.693148', 'step': 10288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.737424', 'step': 10288, 'epoch': 2} {'type': 'loss', 'content': 0.03267059102654457, 'timestamp': '2025-09-10 02:38:05.741251', 'step': 10289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.783758', 'step': 10289, 'epoch': 2} {'type': 'loss', 'content': 0.18478785455226898, 'timestamp': '2025-09-10 02:38:05.792233', 'step': 10290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.845864', 'step': 10290, 'epoch': 2} {'type': 'loss', 'content': 0.12648114562034607, 'timestamp': '2025-09-10 02:38:05.848771', 'step': 10291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.897125', 'step': 10291, 'epoch': 2} {'type': 'loss', 'content': 0.09123072773218155, 'timestamp': '2025-09-10 02:38:05.930692', 'step': 10292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:05.986318', 'step': 10292, 'epoch': 2} {'type': 'loss', 'content': 0.06151891499757767, 'timestamp': '2025-09-10 02:38:05.989971', 'step': 10293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.024377', 'step': 10293, 'epoch': 2} {'type': 'loss', 'content': 0.06792113184928894, 'timestamp': '2025-09-10 02:38:06.027847', 'step': 10294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:06.066868', 'step': 10294, 'epoch': 2} {'type': 'loss', 'content': 0.051836226135492325, 'timestamp': '2025-09-10 02:38:06.071075', 'step': 10295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.106329', 'step': 10295, 'epoch': 2} {'type': 'loss', 'content': 0.12621045112609863, 'timestamp': '2025-09-10 02:38:06.131906', 'step': 10296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.171469', 'step': 10296, 'epoch': 2} {'type': 'loss', 'content': 0.14527037739753723, 'timestamp': '2025-09-10 02:38:06.177319', 'step': 10297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.213655', 'step': 10297, 'epoch': 2} {'type': 'loss', 'content': 0.14719603955745697, 'timestamp': '2025-09-10 02:38:06.223698', 'step': 10298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:06.277417', 'step': 10298, 'epoch': 2} {'type': 'loss', 'content': 0.10035613179206848, 'timestamp': '2025-09-10 02:38:06.281620', 'step': 10299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:06.321971', 'step': 10299, 'epoch': 2} {'type': 'loss', 'content': 0.13502657413482666, 'timestamp': '2025-09-10 02:38:06.345177', 'step': 10300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:06.374814', 'step': 10300, 'epoch': 2} {'type': 'loss', 'content': 0.19676929712295532, 'timestamp': '2025-09-10 02:38:06.376904', 'step': 10301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:06.406471', 'step': 10301, 'epoch': 2} {'type': 'loss', 'content': 0.09577248245477676, 'timestamp': '2025-09-10 02:38:06.408947', 'step': 10302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:06.439641', 'step': 10302, 'epoch': 2} {'type': 'loss', 'content': 0.07495413720607758, 'timestamp': '2025-09-10 02:38:06.441961', 'step': 10303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:06.472897', 'step': 10303, 'epoch': 2} {'type': 'loss', 'content': 0.11578577011823654, 'timestamp': '2025-09-10 02:38:06.496443', 'step': 10304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.527339', 'step': 10304, 'epoch': 2} {'type': 'loss', 'content': 0.09473706781864166, 'timestamp': '2025-09-10 02:38:06.529503', 'step': 10305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.560488', 'step': 10305, 'epoch': 2} {'type': 'loss', 'content': 0.1633971929550171, 'timestamp': '2025-09-10 02:38:06.563178', 'step': 10306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.602268', 'step': 10306, 'epoch': 2} {'type': 'loss', 'content': 0.10139155387878418, 'timestamp': '2025-09-10 02:38:06.604779', 'step': 10307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:06.636943', 'step': 10307, 'epoch': 2} {'type': 'loss', 'content': 0.07345302402973175, 'timestamp': '2025-09-10 02:38:06.660004', 'step': 10308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.692337', 'step': 10308, 'epoch': 2} {'type': 'loss', 'content': 0.13241474330425262, 'timestamp': '2025-09-10 02:38:06.694252', 'step': 10309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.727896', 'step': 10309, 'epoch': 2} {'type': 'loss', 'content': 0.09812776744365692, 'timestamp': '2025-09-10 02:38:06.729959', 'step': 10310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.759820', 'step': 10310, 'epoch': 2} {'type': 'loss', 'content': 0.09868806600570679, 'timestamp': '2025-09-10 02:38:06.761448', 'step': 10311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:06.790936', 'step': 10311, 'epoch': 2} {'type': 'loss', 'content': 0.083004891872406, 'timestamp': '2025-09-10 02:38:06.814270', 'step': 10312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:06.843970', 'step': 10312, 'epoch': 2} {'type': 'loss', 'content': 0.09999413788318634, 'timestamp': '2025-09-10 02:38:06.845912', 'step': 10313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:06.876221', 'step': 10313, 'epoch': 2} {'type': 'loss', 'content': 0.09229810535907745, 'timestamp': '2025-09-10 02:38:06.878737', 'step': 10314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:06.908970', 'step': 10314, 'epoch': 2} {'type': 'loss', 'content': 0.09647367894649506, 'timestamp': '2025-09-10 02:38:06.910942', 'step': 10315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:06.941394', 'step': 10315, 'epoch': 2} {'type': 'loss', 'content': 0.08479029685258865, 'timestamp': '2025-09-10 02:38:06.966433', 'step': 10316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:07.019423', 'step': 10316, 'epoch': 2} {'type': 'loss', 'content': 0.14550599455833435, 'timestamp': '2025-09-10 02:38:07.029388', 'step': 10317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:07.069414', 'step': 10317, 'epoch': 2} {'type': 'loss', 'content': 0.09111721068620682, 'timestamp': '2025-09-10 02:38:07.081390', 'step': 10318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:07.122904', 'step': 10318, 'epoch': 2} {'type': 'loss', 'content': 0.17927931249141693, 'timestamp': '2025-09-10 02:38:07.128304', 'step': 10319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:07.174319', 'step': 10319, 'epoch': 2} {'type': 'loss', 'content': 0.14687244594097137, 'timestamp': '2025-09-10 02:38:07.199245', 'step': 10320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:07.240133', 'step': 10320, 'epoch': 2} {'type': 'loss', 'content': 0.0981561541557312, 'timestamp': '2025-09-10 02:38:07.247426', 'step': 10321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:07.286567', 'step': 10321, 'epoch': 2} {'type': 'loss', 'content': 0.15336374938488007, 'timestamp': '2025-09-10 02:38:07.293271', 'step': 10322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:07.339221', 'step': 10322, 'epoch': 2} {'type': 'loss', 'content': 0.060966119170188904, 'timestamp': '2025-09-10 02:38:07.343220', 'step': 10323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:07.382915', 'step': 10323, 'epoch': 2} {'type': 'loss', 'content': 0.10884664952754974, 'timestamp': '2025-09-10 02:38:07.410584', 'step': 10324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:07.450335', 'step': 10324, 'epoch': 2} {'type': 'loss', 'content': 0.09792518615722656, 'timestamp': '2025-09-10 02:38:07.460459', 'step': 10325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:07.510851', 'step': 10325, 'epoch': 2} {'type': 'loss', 'content': 0.12592533230781555, 'timestamp': '2025-09-10 02:38:07.515174', 'step': 10326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:07.561124', 'step': 10326, 'epoch': 2} {'type': 'loss', 'content': 0.11562740802764893, 'timestamp': '2025-09-10 02:38:07.567350', 'step': 10327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:07.622766', 'step': 10327, 'epoch': 2} {'type': 'loss', 'content': 0.11876554042100906, 'timestamp': '2025-09-10 02:38:07.653245', 'step': 10328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:07.713338', 'step': 10328, 'epoch': 2} {'type': 'loss', 'content': 0.16973468661308289, 'timestamp': '2025-09-10 02:38:07.732725', 'step': 10329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:07.791375', 'step': 10329, 'epoch': 2} {'type': 'loss', 'content': 0.06835363060235977, 'timestamp': '2025-09-10 02:38:07.802600', 'step': 10330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:07.878162', 'step': 10330, 'epoch': 2} {'type': 'loss', 'content': 0.08905930072069168, 'timestamp': '2025-09-10 02:38:07.885459', 'step': 10331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:07.930781', 'step': 10331, 'epoch': 2} {'type': 'loss', 'content': 0.0775165855884552, 'timestamp': '2025-09-10 02:38:07.955205', 'step': 10332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:07.985808', 'step': 10332, 'epoch': 2} {'type': 'loss', 'content': 0.15717463195323944, 'timestamp': '2025-09-10 02:38:07.989463', 'step': 10333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:08.023299', 'step': 10333, 'epoch': 2} {'type': 'loss', 'content': 0.08446953445672989, 'timestamp': '2025-09-10 02:38:08.026651', 'step': 10334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:08.060712', 'step': 10334, 'epoch': 2} {'type': 'loss', 'content': 0.07043173164129257, 'timestamp': '2025-09-10 02:38:08.063288', 'step': 10335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:08.095861', 'step': 10335, 'epoch': 2} {'type': 'loss', 'content': 0.12710855901241302, 'timestamp': '2025-09-10 02:38:08.124643', 'step': 10336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:08.180079', 'step': 10336, 'epoch': 2} {'type': 'loss', 'content': 0.1730620265007019, 'timestamp': '2025-09-10 02:38:08.184265', 'step': 10337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:08.229344', 'step': 10337, 'epoch': 2} {'type': 'loss', 'content': 0.17054621875286102, 'timestamp': '2025-09-10 02:38:08.234272', 'step': 10338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:08.280364', 'step': 10338, 'epoch': 2} {'type': 'loss', 'content': 0.10774119198322296, 'timestamp': '2025-09-10 02:38:08.286543', 'step': 10339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:08.325396', 'step': 10339, 'epoch': 2} {'type': 'loss', 'content': 0.11887434870004654, 'timestamp': '2025-09-10 02:38:08.352628', 'step': 10340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:08.406083', 'step': 10340, 'epoch': 2} {'type': 'loss', 'content': 0.10937051475048065, 'timestamp': '2025-09-10 02:38:08.408362', 'step': 10341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:08.451512', 'step': 10341, 'epoch': 2} {'type': 'loss', 'content': 0.16631782054901123, 'timestamp': '2025-09-10 02:38:08.453712', 'step': 10342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:08.487306', 'step': 10342, 'epoch': 2} {'type': 'loss', 'content': 0.10811605304479599, 'timestamp': '2025-09-10 02:38:08.492988', 'step': 10343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:08.526472', 'step': 10343, 'epoch': 2} {'type': 'loss', 'content': 0.18297700583934784, 'timestamp': '2025-09-10 02:38:08.553744', 'step': 10344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:08.588414', 'step': 10344, 'epoch': 2} {'type': 'loss', 'content': 0.07290302962064743, 'timestamp': '2025-09-10 02:38:08.604251', 'step': 10345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:08.647453', 'step': 10345, 'epoch': 2} {'type': 'loss', 'content': 0.057456303387880325, 'timestamp': '2025-09-10 02:38:08.660342', 'step': 10346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:08.706431', 'step': 10346, 'epoch': 2} {'type': 'loss', 'content': 0.12960174679756165, 'timestamp': '2025-09-10 02:38:08.714412', 'step': 10347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:08.765841', 'step': 10347, 'epoch': 2} {'type': 'loss', 'content': 0.07898709177970886, 'timestamp': '2025-09-10 02:38:08.793534', 'step': 10348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:08.827183', 'step': 10348, 'epoch': 2} {'type': 'loss', 'content': 0.1304563730955124, 'timestamp': '2025-09-10 02:38:08.829369', 'step': 10349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:08.877112', 'step': 10349, 'epoch': 2} {'type': 'loss', 'content': 0.09373027831315994, 'timestamp': '2025-09-10 02:38:08.882965', 'step': 10350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:08.914905', 'step': 10350, 'epoch': 2} {'type': 'loss', 'content': 0.06594499200582504, 'timestamp': '2025-09-10 02:38:08.917639', 'step': 10351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:08.948471', 'step': 10351, 'epoch': 2} {'type': 'loss', 'content': 0.0801335945725441, 'timestamp': '2025-09-10 02:38:08.972517', 'step': 10352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:09.006614', 'step': 10352, 'epoch': 2} {'type': 'loss', 'content': 0.07349817454814911, 'timestamp': '2025-09-10 02:38:09.009758', 'step': 10353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.042360', 'step': 10353, 'epoch': 2} {'type': 'loss', 'content': 0.10556722432374954, 'timestamp': '2025-09-10 02:38:09.045086', 'step': 10354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.081192', 'step': 10354, 'epoch': 2} {'type': 'loss', 'content': 0.10021277517080307, 'timestamp': '2025-09-10 02:38:09.088952', 'step': 10355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:38:09.124873', 'step': 10355, 'epoch': 2} {'type': 'loss', 'content': 0.17662353813648224, 'timestamp': '2025-09-10 02:38:09.150329', 'step': 10356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.187150', 'step': 10356, 'epoch': 2} {'type': 'loss', 'content': 0.06335372477769852, 'timestamp': '2025-09-10 02:38:09.190609', 'step': 10357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.231269', 'step': 10357, 'epoch': 2} {'type': 'loss', 'content': 0.11476399004459381, 'timestamp': '2025-09-10 02:38:09.234166', 'step': 10358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.294282', 'step': 10358, 'epoch': 2} {'type': 'loss', 'content': 0.20109297335147858, 'timestamp': '2025-09-10 02:38:09.300788', 'step': 10359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:09.337551', 'step': 10359, 'epoch': 2} {'type': 'loss', 'content': 0.10369828343391418, 'timestamp': '2025-09-10 02:38:09.362069', 'step': 10360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.398941', 'step': 10360, 'epoch': 2} {'type': 'loss', 'content': 0.12119249254465103, 'timestamp': '2025-09-10 02:38:09.402095', 'step': 10361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.441724', 'step': 10361, 'epoch': 2} {'type': 'loss', 'content': 0.05995628237724304, 'timestamp': '2025-09-10 02:38:09.447275', 'step': 10362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.492243', 'step': 10362, 'epoch': 2} {'type': 'loss', 'content': 0.1351364403963089, 'timestamp': '2025-09-10 02:38:09.494493', 'step': 10363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:09.531400', 'step': 10363, 'epoch': 2} {'type': 'loss', 'content': 0.03558792918920517, 'timestamp': '2025-09-10 02:38:09.554904', 'step': 10364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:09.588337', 'step': 10364, 'epoch': 2} {'type': 'loss', 'content': 0.10650550574064255, 'timestamp': '2025-09-10 02:38:09.591388', 'step': 10365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.629674', 'step': 10365, 'epoch': 2} {'type': 'loss', 'content': 0.1553202122449875, 'timestamp': '2025-09-10 02:38:09.632540', 'step': 10366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:09.669740', 'step': 10366, 'epoch': 2} {'type': 'loss', 'content': 0.08134491741657257, 'timestamp': '2025-09-10 02:38:09.673734', 'step': 10367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:09.714628', 'step': 10367, 'epoch': 2} {'type': 'loss', 'content': 0.10445431619882584, 'timestamp': '2025-09-10 02:38:09.740672', 'step': 10368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:09.779389', 'step': 10368, 'epoch': 2} {'type': 'loss', 'content': 0.09401865303516388, 'timestamp': '2025-09-10 02:38:09.787260', 'step': 10369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:09.836494', 'step': 10369, 'epoch': 2} {'type': 'loss', 'content': 0.07615845650434494, 'timestamp': '2025-09-10 02:38:09.840281', 'step': 10370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:09.877390', 'step': 10370, 'epoch': 2} {'type': 'loss', 'content': 0.10026860237121582, 'timestamp': '2025-09-10 02:38:09.896215', 'step': 10371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:09.940310', 'step': 10371, 'epoch': 2} {'type': 'loss', 'content': 0.13076408207416534, 'timestamp': '2025-09-10 02:38:09.965254', 'step': 10372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:10.004704', 'step': 10372, 'epoch': 2} {'type': 'loss', 'content': 0.09619802236557007, 'timestamp': '2025-09-10 02:38:10.008470', 'step': 10373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:10.044880', 'step': 10373, 'epoch': 2} {'type': 'loss', 'content': 0.10701342672109604, 'timestamp': '2025-09-10 02:38:10.049549', 'step': 10374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:10.082294', 'step': 10374, 'epoch': 2} {'type': 'loss', 'content': 0.11628828197717667, 'timestamp': '2025-09-10 02:38:10.085296', 'step': 10375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:10.122113', 'step': 10375, 'epoch': 2} {'type': 'loss', 'content': 0.10958501696586609, 'timestamp': '2025-09-10 02:38:10.146482', 'step': 10376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:10.180693', 'step': 10376, 'epoch': 2} {'type': 'loss', 'content': 0.062304768711328506, 'timestamp': '2025-09-10 02:38:10.183111', 'step': 10377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:10.213367', 'step': 10377, 'epoch': 2} {'type': 'loss', 'content': 0.11652999371290207, 'timestamp': '2025-09-10 02:38:10.216246', 'step': 10378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:10.246191', 'step': 10378, 'epoch': 2} {'type': 'loss', 'content': 0.13136959075927734, 'timestamp': '2025-09-10 02:38:10.250109', 'step': 10379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:10.289383', 'step': 10379, 'epoch': 2} {'type': 'loss', 'content': 0.17069405317306519, 'timestamp': '2025-09-10 02:38:10.325497', 'step': 10380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:10.407483', 'step': 10380, 'epoch': 2} {'type': 'loss', 'content': 0.16394095122814178, 'timestamp': '2025-09-10 02:38:10.411617', 'step': 10381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:10.452544', 'step': 10381, 'epoch': 2} {'type': 'loss', 'content': 0.07591640949249268, 'timestamp': '2025-09-10 02:38:10.458053', 'step': 10382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:10.497937', 'step': 10382, 'epoch': 2} {'type': 'loss', 'content': 0.08622315526008606, 'timestamp': '2025-09-10 02:38:10.502094', 'step': 10383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:10.544365', 'step': 10383, 'epoch': 2} {'type': 'loss', 'content': 0.13742737472057343, 'timestamp': '2025-09-10 02:38:10.567832', 'step': 10384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:10.613699', 'step': 10384, 'epoch': 2} {'type': 'loss', 'content': 0.17243440449237823, 'timestamp': '2025-09-10 02:38:10.619151', 'step': 10385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:10.660464', 'step': 10385, 'epoch': 2} {'type': 'loss', 'content': 0.12185101211071014, 'timestamp': '2025-09-10 02:38:10.666478', 'step': 10386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:10.709105', 'step': 10386, 'epoch': 2} {'type': 'loss', 'content': 0.04832784831523895, 'timestamp': '2025-09-10 02:38:10.717352', 'step': 10387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:10.763557', 'step': 10387, 'epoch': 2} {'type': 'loss', 'content': 0.06606128811836243, 'timestamp': '2025-09-10 02:38:10.791621', 'step': 10388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:10.829145', 'step': 10388, 'epoch': 2} {'type': 'loss', 'content': 0.1671290546655655, 'timestamp': '2025-09-10 02:38:10.831823', 'step': 10389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:10.862398', 'step': 10389, 'epoch': 2} {'type': 'loss', 'content': 0.09606319665908813, 'timestamp': '2025-09-10 02:38:10.864738', 'step': 10390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:10.902514', 'step': 10390, 'epoch': 2} {'type': 'loss', 'content': 0.08808925002813339, 'timestamp': '2025-09-10 02:38:10.920264', 'step': 10391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:10.988173', 'step': 10391, 'epoch': 2} {'type': 'loss', 'content': 0.11281935125589371, 'timestamp': '2025-09-10 02:38:11.013971', 'step': 10392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.051657', 'step': 10392, 'epoch': 2} {'type': 'loss', 'content': 0.14711694419384003, 'timestamp': '2025-09-10 02:38:11.055592', 'step': 10393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.090706', 'step': 10393, 'epoch': 2} {'type': 'loss', 'content': 0.19338981807231903, 'timestamp': '2025-09-10 02:38:11.094259', 'step': 10394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.130404', 'step': 10394, 'epoch': 2} {'type': 'loss', 'content': 0.08239046484231949, 'timestamp': '2025-09-10 02:38:11.133956', 'step': 10395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:38:11.168554', 'step': 10395, 'epoch': 2} {'type': 'loss', 'content': 0.16065740585327148, 'timestamp': '2025-09-10 02:38:11.198394', 'step': 10396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:11.239930', 'step': 10396, 'epoch': 2} {'type': 'loss', 'content': 0.18275494873523712, 'timestamp': '2025-09-10 02:38:11.248763', 'step': 10397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:11.314824', 'step': 10397, 'epoch': 2} {'type': 'loss', 'content': 0.17849908769130707, 'timestamp': '2025-09-10 02:38:11.330071', 'step': 10398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.397198', 'step': 10398, 'epoch': 2} {'type': 'loss', 'content': 0.12802723050117493, 'timestamp': '2025-09-10 02:38:11.403712', 'step': 10399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:11.442984', 'step': 10399, 'epoch': 2} {'type': 'loss', 'content': 0.15426988899707794, 'timestamp': '2025-09-10 02:38:11.469234', 'step': 10400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.530499', 'step': 10400, 'epoch': 2} {'type': 'loss', 'content': 0.12038063257932663, 'timestamp': '2025-09-10 02:38:11.539858', 'step': 10401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.578059', 'step': 10401, 'epoch': 2} {'type': 'loss', 'content': 0.08211234956979752, 'timestamp': '2025-09-10 02:38:11.580811', 'step': 10402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.620179', 'step': 10402, 'epoch': 2} {'type': 'loss', 'content': 0.09122148901224136, 'timestamp': '2025-09-10 02:38:11.623384', 'step': 10403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:11.654259', 'step': 10403, 'epoch': 2} {'type': 'loss', 'content': 0.06924968212842941, 'timestamp': '2025-09-10 02:38:11.677846', 'step': 10404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.714951', 'step': 10404, 'epoch': 2} {'type': 'loss', 'content': 0.11406690627336502, 'timestamp': '2025-09-10 02:38:11.718166', 'step': 10405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:11.754131', 'step': 10405, 'epoch': 2} {'type': 'loss', 'content': 0.12015616148710251, 'timestamp': '2025-09-10 02:38:11.757383', 'step': 10406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:11.787894', 'step': 10406, 'epoch': 2} {'type': 'loss', 'content': 0.09903205931186676, 'timestamp': '2025-09-10 02:38:11.791817', 'step': 10407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.828934', 'step': 10407, 'epoch': 2} {'type': 'loss', 'content': 0.10248454660177231, 'timestamp': '2025-09-10 02:38:11.856652', 'step': 10408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:11.898715', 'step': 10408, 'epoch': 2} {'type': 'loss', 'content': 0.06924520432949066, 'timestamp': '2025-09-10 02:38:11.902848', 'step': 10409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.937891', 'step': 10409, 'epoch': 2} {'type': 'loss', 'content': 0.07644527405500412, 'timestamp': '2025-09-10 02:38:11.942282', 'step': 10410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:11.975171', 'step': 10410, 'epoch': 2} {'type': 'loss', 'content': 0.11247479170560837, 'timestamp': '2025-09-10 02:38:11.978792', 'step': 10411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:12.009678', 'step': 10411, 'epoch': 2} {'type': 'loss', 'content': 0.20779208838939667, 'timestamp': '2025-09-10 02:38:12.035201', 'step': 10412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.068298', 'step': 10412, 'epoch': 2} {'type': 'loss', 'content': 0.12439840286970139, 'timestamp': '2025-09-10 02:38:12.070439', 'step': 10413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.103871', 'step': 10413, 'epoch': 2} {'type': 'loss', 'content': 0.11038508266210556, 'timestamp': '2025-09-10 02:38:12.106087', 'step': 10414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:12.144974', 'step': 10414, 'epoch': 2} {'type': 'loss', 'content': 0.0824587270617485, 'timestamp': '2025-09-10 02:38:12.150086', 'step': 10415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:12.181977', 'step': 10415, 'epoch': 2} {'type': 'loss', 'content': 0.07393058389425278, 'timestamp': '2025-09-10 02:38:12.205852', 'step': 10416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.238471', 'step': 10416, 'epoch': 2} {'type': 'loss', 'content': 0.1318727284669876, 'timestamp': '2025-09-10 02:38:12.240841', 'step': 10417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:12.272518', 'step': 10417, 'epoch': 2} {'type': 'loss', 'content': 0.21090757846832275, 'timestamp': '2025-09-10 02:38:12.274857', 'step': 10418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.305190', 'step': 10418, 'epoch': 2} {'type': 'loss', 'content': 0.1204613745212555, 'timestamp': '2025-09-10 02:38:12.307614', 'step': 10419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:12.337650', 'step': 10419, 'epoch': 2} {'type': 'loss', 'content': 0.1780083030462265, 'timestamp': '2025-09-10 02:38:12.361410', 'step': 10420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:12.391730', 'step': 10420, 'epoch': 2} {'type': 'loss', 'content': 0.13134229183197021, 'timestamp': '2025-09-10 02:38:12.394074', 'step': 10421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:12.425285', 'step': 10421, 'epoch': 2} {'type': 'loss', 'content': 0.1512930989265442, 'timestamp': '2025-09-10 02:38:12.428026', 'step': 10422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:12.459105', 'step': 10422, 'epoch': 2} {'type': 'loss', 'content': 0.14200809597969055, 'timestamp': '2025-09-10 02:38:12.461444', 'step': 10423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.491327', 'step': 10423, 'epoch': 2} {'type': 'loss', 'content': 0.12711212038993835, 'timestamp': '2025-09-10 02:38:12.514839', 'step': 10424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.545564', 'step': 10424, 'epoch': 2} {'type': 'loss', 'content': 0.14728263020515442, 'timestamp': '2025-09-10 02:38:12.548217', 'step': 10425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:12.578758', 'step': 10425, 'epoch': 2} {'type': 'loss', 'content': 0.15472356975078583, 'timestamp': '2025-09-10 02:38:12.581784', 'step': 10426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.611512', 'step': 10426, 'epoch': 2} {'type': 'loss', 'content': 0.11232365667819977, 'timestamp': '2025-09-10 02:38:12.613851', 'step': 10427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:12.644015', 'step': 10427, 'epoch': 2} {'type': 'loss', 'content': 0.0624237023293972, 'timestamp': '2025-09-10 02:38:12.667973', 'step': 10428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.698694', 'step': 10428, 'epoch': 2} {'type': 'loss', 'content': 0.10795874893665314, 'timestamp': '2025-09-10 02:38:12.701031', 'step': 10429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:12.732551', 'step': 10429, 'epoch': 2} {'type': 'loss', 'content': 0.11206122487783432, 'timestamp': '2025-09-10 02:38:12.734758', 'step': 10430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:12.765666', 'step': 10430, 'epoch': 2} {'type': 'loss', 'content': 0.18420273065567017, 'timestamp': '2025-09-10 02:38:12.767488', 'step': 10431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.797623', 'step': 10431, 'epoch': 2} {'type': 'loss', 'content': 0.1078898161649704, 'timestamp': '2025-09-10 02:38:12.821756', 'step': 10432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:12.852352', 'step': 10432, 'epoch': 2} {'type': 'loss', 'content': 0.1365405172109604, 'timestamp': '2025-09-10 02:38:12.854936', 'step': 10433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:12.885886', 'step': 10433, 'epoch': 2} {'type': 'loss', 'content': 0.08035816252231598, 'timestamp': '2025-09-10 02:38:12.888678', 'step': 10434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:12.918859', 'step': 10434, 'epoch': 2} {'type': 'loss', 'content': 0.10908710211515427, 'timestamp': '2025-09-10 02:38:12.921102', 'step': 10435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:12.951648', 'step': 10435, 'epoch': 2} {'type': 'loss', 'content': 0.05983613431453705, 'timestamp': '2025-09-10 02:38:12.975123', 'step': 10436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:38:13.006650', 'step': 10436, 'epoch': 2} {'type': 'loss', 'content': 0.1371583789587021, 'timestamp': '2025-09-10 02:38:13.011328', 'step': 10437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:13.041617', 'step': 10437, 'epoch': 2} {'type': 'loss', 'content': 0.07263298332691193, 'timestamp': '2025-09-10 02:38:13.043973', 'step': 10438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:13.074577', 'step': 10438, 'epoch': 2} {'type': 'loss', 'content': 0.04208563640713692, 'timestamp': '2025-09-10 02:38:13.077238', 'step': 10439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:13.108370', 'step': 10439, 'epoch': 2} {'type': 'loss', 'content': 0.1678408533334732, 'timestamp': '2025-09-10 02:38:13.135116', 'step': 10440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:13.165654', 'step': 10440, 'epoch': 2} {'type': 'loss', 'content': 0.08580084145069122, 'timestamp': '2025-09-10 02:38:13.167750', 'step': 10441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:13.197827', 'step': 10441, 'epoch': 2} {'type': 'loss', 'content': 0.09938719123601913, 'timestamp': '2025-09-10 02:38:13.200285', 'step': 10442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:13.232116', 'step': 10442, 'epoch': 2} {'type': 'loss', 'content': 0.08757013827562332, 'timestamp': '2025-09-10 02:38:13.234610', 'step': 10443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:13.266401', 'step': 10443, 'epoch': 2} {'type': 'loss', 'content': 0.11020780354738235, 'timestamp': '2025-09-10 02:38:13.289753', 'step': 10444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:13.320246', 'step': 10444, 'epoch': 2} {'type': 'loss', 'content': 0.07839076966047287, 'timestamp': '2025-09-10 02:38:13.323102', 'step': 10445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:13.353504', 'step': 10445, 'epoch': 2} {'type': 'loss', 'content': 0.08945511281490326, 'timestamp': '2025-09-10 02:38:13.356283', 'step': 10446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:13.386175', 'step': 10446, 'epoch': 2} {'type': 'loss', 'content': 0.059296734631061554, 'timestamp': '2025-09-10 02:38:13.388841', 'step': 10447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:13.419188', 'step': 10447, 'epoch': 2} {'type': 'loss', 'content': 0.13361231982707977, 'timestamp': '2025-09-10 02:38:13.444254', 'step': 10448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:13.474191', 'step': 10448, 'epoch': 2} {'type': 'loss', 'content': 0.1813025325536728, 'timestamp': '2025-09-10 02:38:13.484767', 'step': 10449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:13.515548', 'step': 10449, 'epoch': 2} {'type': 'loss', 'content': 0.11720463633537292, 'timestamp': '2025-09-10 02:38:13.518245', 'step': 10450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:13.548783', 'step': 10450, 'epoch': 2} {'type': 'loss', 'content': 0.09820221364498138, 'timestamp': '2025-09-10 02:38:13.550739', 'step': 10451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:13.581133', 'step': 10451, 'epoch': 2} {'type': 'loss', 'content': 0.17992275953292847, 'timestamp': '2025-09-10 02:38:13.606738', 'step': 10452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:13.643931', 'step': 10452, 'epoch': 2} {'type': 'loss', 'content': 0.15976233780384064, 'timestamp': '2025-09-10 02:38:13.646387', 'step': 10453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:13.677947', 'step': 10453, 'epoch': 2} {'type': 'loss', 'content': 0.13911420106887817, 'timestamp': '2025-09-10 02:38:13.680076', 'step': 10454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:13.710481', 'step': 10454, 'epoch': 2} {'type': 'loss', 'content': 0.12790842354297638, 'timestamp': '2025-09-10 02:38:13.712869', 'step': 10455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:13.748121', 'step': 10455, 'epoch': 2} {'type': 'loss', 'content': 0.09041140228509903, 'timestamp': '2025-09-10 02:38:13.771635', 'step': 10456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:13.801649', 'step': 10456, 'epoch': 2} {'type': 'loss', 'content': 0.0905340164899826, 'timestamp': '2025-09-10 02:38:13.803937', 'step': 10457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:13.833849', 'step': 10457, 'epoch': 2} {'type': 'loss', 'content': 0.11373438686132431, 'timestamp': '2025-09-10 02:38:13.836157', 'step': 10458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:13.867872', 'step': 10458, 'epoch': 2} {'type': 'loss', 'content': 0.10650631785392761, 'timestamp': '2025-09-10 02:38:13.872132', 'step': 10459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:13.902238', 'step': 10459, 'epoch': 2} {'type': 'loss', 'content': 0.08553118258714676, 'timestamp': '2025-09-10 02:38:13.925738', 'step': 10460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:13.956248', 'step': 10460, 'epoch': 2} {'type': 'loss', 'content': 0.06725585460662842, 'timestamp': '2025-09-10 02:38:13.958628', 'step': 10461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:13.988399', 'step': 10461, 'epoch': 2} {'type': 'loss', 'content': 0.13944606482982635, 'timestamp': '2025-09-10 02:38:13.991163', 'step': 10462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.020696', 'step': 10462, 'epoch': 2} {'type': 'loss', 'content': 0.1468036025762558, 'timestamp': '2025-09-10 02:38:14.024357', 'step': 10463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.056030', 'step': 10463, 'epoch': 2} {'type': 'loss', 'content': 0.06693672388792038, 'timestamp': '2025-09-10 02:38:14.079931', 'step': 10464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:14.111004', 'step': 10464, 'epoch': 2} {'type': 'loss', 'content': 0.03165054693818092, 'timestamp': '2025-09-10 02:38:14.113335', 'step': 10465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.148856', 'step': 10465, 'epoch': 2} {'type': 'loss', 'content': 0.13031722605228424, 'timestamp': '2025-09-10 02:38:14.151075', 'step': 10466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:14.181809', 'step': 10466, 'epoch': 2} {'type': 'loss', 'content': 0.13056857883930206, 'timestamp': '2025-09-10 02:38:14.184137', 'step': 10467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.214408', 'step': 10467, 'epoch': 2} {'type': 'loss', 'content': 0.044097136706113815, 'timestamp': '2025-09-10 02:38:14.237886', 'step': 10468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:14.268477', 'step': 10468, 'epoch': 2} {'type': 'loss', 'content': 0.11761309951543808, 'timestamp': '2025-09-10 02:38:14.271303', 'step': 10469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:14.301524', 'step': 10469, 'epoch': 2} {'type': 'loss', 'content': 0.19986161589622498, 'timestamp': '2025-09-10 02:38:14.304545', 'step': 10470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:14.334647', 'step': 10470, 'epoch': 2} {'type': 'loss', 'content': 0.06657498329877853, 'timestamp': '2025-09-10 02:38:14.336912', 'step': 10471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.367209', 'step': 10471, 'epoch': 2} {'type': 'loss', 'content': 0.029947245493531227, 'timestamp': '2025-09-10 02:38:14.390668', 'step': 10472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.420645', 'step': 10472, 'epoch': 2} {'type': 'loss', 'content': 0.09590106457471848, 'timestamp': '2025-09-10 02:38:14.422974', 'step': 10473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:14.454753', 'step': 10473, 'epoch': 2} {'type': 'loss', 'content': 0.058901477605104446, 'timestamp': '2025-09-10 02:38:14.456818', 'step': 10474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.486318', 'step': 10474, 'epoch': 2} {'type': 'loss', 'content': 0.042888741940259933, 'timestamp': '2025-09-10 02:38:14.488659', 'step': 10475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.519187', 'step': 10475, 'epoch': 2} {'type': 'loss', 'content': 0.1434161216020584, 'timestamp': '2025-09-10 02:38:14.542814', 'step': 10476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:14.572858', 'step': 10476, 'epoch': 2} {'type': 'loss', 'content': 0.11081298440694809, 'timestamp': '2025-09-10 02:38:14.575345', 'step': 10477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:14.605429', 'step': 10477, 'epoch': 2} {'type': 'loss', 'content': 0.1595432162284851, 'timestamp': '2025-09-10 02:38:14.607987', 'step': 10478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.637631', 'step': 10478, 'epoch': 2} {'type': 'loss', 'content': 0.04901636391878128, 'timestamp': '2025-09-10 02:38:14.639736', 'step': 10479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:14.670563', 'step': 10479, 'epoch': 2} {'type': 'loss', 'content': 0.10547683387994766, 'timestamp': '2025-09-10 02:38:14.694059', 'step': 10480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.724674', 'step': 10480, 'epoch': 2} {'type': 'loss', 'content': 0.1192917749285698, 'timestamp': '2025-09-10 02:38:14.728552', 'step': 10481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:14.760242', 'step': 10481, 'epoch': 2} {'type': 'loss', 'content': 0.12319119274616241, 'timestamp': '2025-09-10 02:38:14.762748', 'step': 10482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.793177', 'step': 10482, 'epoch': 2} {'type': 'loss', 'content': 0.11981434375047684, 'timestamp': '2025-09-10 02:38:14.795413', 'step': 10483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:14.825928', 'step': 10483, 'epoch': 2} {'type': 'loss', 'content': 0.08082487434148788, 'timestamp': '2025-09-10 02:38:14.849676', 'step': 10484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:14.881016', 'step': 10484, 'epoch': 2} {'type': 'loss', 'content': 0.12943555414676666, 'timestamp': '2025-09-10 02:38:14.883247', 'step': 10485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:14.913158', 'step': 10485, 'epoch': 2} {'type': 'loss', 'content': 0.03308984264731407, 'timestamp': '2025-09-10 02:38:14.915281', 'step': 10486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:14.945032', 'step': 10486, 'epoch': 2} {'type': 'loss', 'content': 0.201543927192688, 'timestamp': '2025-09-10 02:38:14.947834', 'step': 10487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:14.978739', 'step': 10487, 'epoch': 2} {'type': 'loss', 'content': 0.14933890104293823, 'timestamp': '2025-09-10 02:38:15.002219', 'step': 10488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:15.032117', 'step': 10488, 'epoch': 2} {'type': 'loss', 'content': 0.05206457898020744, 'timestamp': '2025-09-10 02:38:15.036248', 'step': 10489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:15.069982', 'step': 10489, 'epoch': 2} {'type': 'loss', 'content': 0.13395610451698303, 'timestamp': '2025-09-10 02:38:15.072442', 'step': 10490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:15.103184', 'step': 10490, 'epoch': 2} {'type': 'loss', 'content': 0.12421572953462601, 'timestamp': '2025-09-10 02:38:15.105310', 'step': 10491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:15.135070', 'step': 10491, 'epoch': 2} {'type': 'loss', 'content': 0.09975293278694153, 'timestamp': '2025-09-10 02:38:15.159335', 'step': 10492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:15.189153', 'step': 10492, 'epoch': 2} {'type': 'loss', 'content': 0.13255095481872559, 'timestamp': '2025-09-10 02:38:15.191411', 'step': 10493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:15.220998', 'step': 10493, 'epoch': 2} {'type': 'loss', 'content': 0.11385759711265564, 'timestamp': '2025-09-10 02:38:15.223145', 'step': 10494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:15.253823', 'step': 10494, 'epoch': 2} {'type': 'loss', 'content': 0.10895610600709915, 'timestamp': '2025-09-10 02:38:15.256256', 'step': 10495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:15.285198', 'step': 10495, 'epoch': 2} {'type': 'loss', 'content': 0.15465569496154785, 'timestamp': '2025-09-10 02:38:15.309142', 'step': 10496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:15.339116', 'step': 10496, 'epoch': 2} {'type': 'loss', 'content': 0.0860256627202034, 'timestamp': '2025-09-10 02:38:15.341258', 'step': 10497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:15.370345', 'step': 10497, 'epoch': 2} {'type': 'loss', 'content': 0.16483265161514282, 'timestamp': '2025-09-10 02:38:15.372492', 'step': 10498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:15.401974', 'step': 10498, 'epoch': 2} {'type': 'loss', 'content': 0.06331228464841843, 'timestamp': '2025-09-10 02:38:15.404336', 'step': 10499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:15.433582', 'step': 10499, 'epoch': 2} {'type': 'loss', 'content': 0.14984259009361267, 'timestamp': '2025-09-10 02:38:15.456937', 'step': 10500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10500', 'timestamp': '2025-09-10 02:38:22.012526', 'step': 10500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:22.060264', 'step': 10500, 'epoch': 2} {'type': 'loss', 'content': 0.07941896468400955, 'timestamp': '2025-09-10 02:38:22.062467', 'step': 10501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:22.095031', 'step': 10501, 'epoch': 2} {'type': 'loss', 'content': 0.08963551372289658, 'timestamp': '2025-09-10 02:38:22.097025', 'step': 10502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:22.127950', 'step': 10502, 'epoch': 2} {'type': 'loss', 'content': 0.12146313488483429, 'timestamp': '2025-09-10 02:38:22.130439', 'step': 10503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:22.162370', 'step': 10503, 'epoch': 2} {'type': 'loss', 'content': 0.16238640248775482, 'timestamp': '2025-09-10 02:38:22.186089', 'step': 10504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:22.218061', 'step': 10504, 'epoch': 2} {'type': 'loss', 'content': 0.2238515019416809, 'timestamp': '2025-09-10 02:38:22.220374', 'step': 10505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:22.252148', 'step': 10505, 'epoch': 2} {'type': 'loss', 'content': 0.0738881379365921, 'timestamp': '2025-09-10 02:38:22.254633', 'step': 10506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:22.285433', 'step': 10506, 'epoch': 2} {'type': 'loss', 'content': 0.18272274732589722, 'timestamp': '2025-09-10 02:38:22.287630', 'step': 10507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:22.317535', 'step': 10507, 'epoch': 2} {'type': 'loss', 'content': 0.08474952727556229, 'timestamp': '2025-09-10 02:38:22.341210', 'step': 10508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:22.371123', 'step': 10508, 'epoch': 2} {'type': 'loss', 'content': 0.09476213902235031, 'timestamp': '2025-09-10 02:38:22.373279', 'step': 10509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:22.403287', 'step': 10509, 'epoch': 2} {'type': 'loss', 'content': 0.06266390532255173, 'timestamp': '2025-09-10 02:38:22.405544', 'step': 10510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:22.435713', 'step': 10510, 'epoch': 2} {'type': 'loss', 'content': 0.15368859469890594, 'timestamp': '2025-09-10 02:38:22.437994', 'step': 10511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:22.467721', 'step': 10511, 'epoch': 2} {'type': 'loss', 'content': 0.1088946983218193, 'timestamp': '2025-09-10 02:38:22.491313', 'step': 10512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:22.521966', 'step': 10512, 'epoch': 2} {'type': 'loss', 'content': 0.12653084099292755, 'timestamp': '2025-09-10 02:38:22.524359', 'step': 10513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:22.555555', 'step': 10513, 'epoch': 2} {'type': 'loss', 'content': 0.10615858435630798, 'timestamp': '2025-09-10 02:38:22.557822', 'step': 10514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:22.587714', 'step': 10514, 'epoch': 2} {'type': 'loss', 'content': 0.06144321337342262, 'timestamp': '2025-09-10 02:38:22.589963', 'step': 10515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:22.619449', 'step': 10515, 'epoch': 2} {'type': 'loss', 'content': 0.12962603569030762, 'timestamp': '2025-09-10 02:38:22.643204', 'step': 10516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:22.675185', 'step': 10516, 'epoch': 2} {'type': 'loss', 'content': 0.049169208854436874, 'timestamp': '2025-09-10 02:38:22.677497', 'step': 10517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:22.707978', 'step': 10517, 'epoch': 2} {'type': 'loss', 'content': 0.16417230665683746, 'timestamp': '2025-09-10 02:38:22.710114', 'step': 10518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:22.740729', 'step': 10518, 'epoch': 2} {'type': 'loss', 'content': 0.10003340244293213, 'timestamp': '2025-09-10 02:38:22.743530', 'step': 10519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:22.774915', 'step': 10519, 'epoch': 2} {'type': 'loss', 'content': 0.17381836473941803, 'timestamp': '2025-09-10 02:38:22.799827', 'step': 10520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:22.830482', 'step': 10520, 'epoch': 2} {'type': 'loss', 'content': 0.014006301760673523, 'timestamp': '2025-09-10 02:38:22.834440', 'step': 10521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:22.864490', 'step': 10521, 'epoch': 2} {'type': 'loss', 'content': 0.05717941373586655, 'timestamp': '2025-09-10 02:38:22.867101', 'step': 10522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:22.897385', 'step': 10522, 'epoch': 2} {'type': 'loss', 'content': 0.2111319899559021, 'timestamp': '2025-09-10 02:38:22.899765', 'step': 10523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:22.930273', 'step': 10523, 'epoch': 2} {'type': 'loss', 'content': 0.1224982813000679, 'timestamp': '2025-09-10 02:38:22.953771', 'step': 10524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:22.985329', 'step': 10524, 'epoch': 2} {'type': 'loss', 'content': 0.08313566446304321, 'timestamp': '2025-09-10 02:38:22.987359', 'step': 10525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.016906', 'step': 10525, 'epoch': 2} {'type': 'loss', 'content': 0.06905335932970047, 'timestamp': '2025-09-10 02:38:23.019075', 'step': 10526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:23.049016', 'step': 10526, 'epoch': 2} {'type': 'loss', 'content': 0.08858390897512436, 'timestamp': '2025-09-10 02:38:23.051017', 'step': 10527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.080909', 'step': 10527, 'epoch': 2} {'type': 'loss', 'content': 0.07598540186882019, 'timestamp': '2025-09-10 02:38:23.104845', 'step': 10528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.135035', 'step': 10528, 'epoch': 2} {'type': 'loss', 'content': 0.18772675096988678, 'timestamp': '2025-09-10 02:38:23.138767', 'step': 10529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:23.170022', 'step': 10529, 'epoch': 2} {'type': 'loss', 'content': 0.13052721321582794, 'timestamp': '2025-09-10 02:38:23.172250', 'step': 10530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:23.202938', 'step': 10530, 'epoch': 2} {'type': 'loss', 'content': 0.1400141716003418, 'timestamp': '2025-09-10 02:38:23.205243', 'step': 10531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:23.234968', 'step': 10531, 'epoch': 2} {'type': 'loss', 'content': 0.1073557659983635, 'timestamp': '2025-09-10 02:38:23.258505', 'step': 10532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.289559', 'step': 10532, 'epoch': 2} {'type': 'loss', 'content': 0.14163760840892792, 'timestamp': '2025-09-10 02:38:23.291808', 'step': 10533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:23.322136', 'step': 10533, 'epoch': 2} {'type': 'loss', 'content': 0.09723731130361557, 'timestamp': '2025-09-10 02:38:23.324815', 'step': 10534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:23.355362', 'step': 10534, 'epoch': 2} {'type': 'loss', 'content': 0.06643659621477127, 'timestamp': '2025-09-10 02:38:23.357705', 'step': 10535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:23.387739', 'step': 10535, 'epoch': 2} {'type': 'loss', 'content': 0.0610317662358284, 'timestamp': '2025-09-10 02:38:23.411214', 'step': 10536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:23.442690', 'step': 10536, 'epoch': 2} {'type': 'loss', 'content': 0.12628832459449768, 'timestamp': '2025-09-10 02:38:23.444897', 'step': 10537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:23.475406', 'step': 10537, 'epoch': 2} {'type': 'loss', 'content': 0.09173787385225296, 'timestamp': '2025-09-10 02:38:23.477604', 'step': 10538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.507759', 'step': 10538, 'epoch': 2} {'type': 'loss', 'content': 0.09308594465255737, 'timestamp': '2025-09-10 02:38:23.510287', 'step': 10539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.541644', 'step': 10539, 'epoch': 2} {'type': 'loss', 'content': 0.2513562738895416, 'timestamp': '2025-09-10 02:38:23.566290', 'step': 10540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:23.596788', 'step': 10540, 'epoch': 2} {'type': 'loss', 'content': 0.18136684596538544, 'timestamp': '2025-09-10 02:38:23.600140', 'step': 10541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.630517', 'step': 10541, 'epoch': 2} {'type': 'loss', 'content': 0.09457667917013168, 'timestamp': '2025-09-10 02:38:23.633443', 'step': 10542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:23.679935', 'step': 10542, 'epoch': 2} {'type': 'loss', 'content': 0.17391063272953033, 'timestamp': '2025-09-10 02:38:23.682094', 'step': 10543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.712596', 'step': 10543, 'epoch': 2} {'type': 'loss', 'content': 0.07889407873153687, 'timestamp': '2025-09-10 02:38:23.736076', 'step': 10544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:23.766505', 'step': 10544, 'epoch': 2} {'type': 'loss', 'content': 0.06104505807161331, 'timestamp': '2025-09-10 02:38:23.768816', 'step': 10545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:23.799630', 'step': 10545, 'epoch': 2} {'type': 'loss', 'content': 0.10369972139596939, 'timestamp': '2025-09-10 02:38:23.801981', 'step': 10546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.832656', 'step': 10546, 'epoch': 2} {'type': 'loss', 'content': 0.09850190579891205, 'timestamp': '2025-09-10 02:38:23.835278', 'step': 10547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:23.866828', 'step': 10547, 'epoch': 2} {'type': 'loss', 'content': 0.1525646150112152, 'timestamp': '2025-09-10 02:38:23.890510', 'step': 10548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:23.921655', 'step': 10548, 'epoch': 2} {'type': 'loss', 'content': 0.15321743488311768, 'timestamp': '2025-09-10 02:38:23.924187', 'step': 10549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:23.954279', 'step': 10549, 'epoch': 2} {'type': 'loss', 'content': 0.05212859436869621, 'timestamp': '2025-09-10 02:38:23.956476', 'step': 10550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:23.987185', 'step': 10550, 'epoch': 2} {'type': 'loss', 'content': 0.1686609387397766, 'timestamp': '2025-09-10 02:38:23.989376', 'step': 10551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:24.019553', 'step': 10551, 'epoch': 2} {'type': 'loss', 'content': 0.12713415920734406, 'timestamp': '2025-09-10 02:38:24.043159', 'step': 10552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:24.075468', 'step': 10552, 'epoch': 2} {'type': 'loss', 'content': 0.15418769419193268, 'timestamp': '2025-09-10 02:38:24.078435', 'step': 10553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:24.108409', 'step': 10553, 'epoch': 2} {'type': 'loss', 'content': 0.08195716142654419, 'timestamp': '2025-09-10 02:38:24.111406', 'step': 10554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:24.144956', 'step': 10554, 'epoch': 2} {'type': 'loss', 'content': 0.11428198963403702, 'timestamp': '2025-09-10 02:38:24.149537', 'step': 10555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:24.188864', 'step': 10555, 'epoch': 2} {'type': 'loss', 'content': 0.10858280956745148, 'timestamp': '2025-09-10 02:38:24.213037', 'step': 10556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:24.245895', 'step': 10556, 'epoch': 2} {'type': 'loss', 'content': 0.08819474279880524, 'timestamp': '2025-09-10 02:38:24.249135', 'step': 10557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:24.283702', 'step': 10557, 'epoch': 2} {'type': 'loss', 'content': 0.08614391088485718, 'timestamp': '2025-09-10 02:38:24.286877', 'step': 10558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:24.321135', 'step': 10558, 'epoch': 2} {'type': 'loss', 'content': 0.10901077091693878, 'timestamp': '2025-09-10 02:38:24.323551', 'step': 10559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:24.353790', 'step': 10559, 'epoch': 2} {'type': 'loss', 'content': 0.0933903381228447, 'timestamp': '2025-09-10 02:38:24.378104', 'step': 10560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:24.412184', 'step': 10560, 'epoch': 2} {'type': 'loss', 'content': 0.1573394536972046, 'timestamp': '2025-09-10 02:38:24.414535', 'step': 10561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:24.444304', 'step': 10561, 'epoch': 2} {'type': 'loss', 'content': 0.08049661666154861, 'timestamp': '2025-09-10 02:38:24.446922', 'step': 10562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:24.477211', 'step': 10562, 'epoch': 2} {'type': 'loss', 'content': 0.17569753527641296, 'timestamp': '2025-09-10 02:38:24.480577', 'step': 10563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:24.511237', 'step': 10563, 'epoch': 2} {'type': 'loss', 'content': 0.09361935406923294, 'timestamp': '2025-09-10 02:38:24.534768', 'step': 10564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:24.567622', 'step': 10564, 'epoch': 2} {'type': 'loss', 'content': 0.10854554921388626, 'timestamp': '2025-09-10 02:38:24.570241', 'step': 10565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:24.602445', 'step': 10565, 'epoch': 2} {'type': 'loss', 'content': 0.12297110259532928, 'timestamp': '2025-09-10 02:38:24.605351', 'step': 10566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:24.639020', 'step': 10566, 'epoch': 2} {'type': 'loss', 'content': 0.09900424629449844, 'timestamp': '2025-09-10 02:38:24.645070', 'step': 10567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:24.679661', 'step': 10567, 'epoch': 2} {'type': 'loss', 'content': 0.07839663326740265, 'timestamp': '2025-09-10 02:38:24.703295', 'step': 10568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:24.735984', 'step': 10568, 'epoch': 2} {'type': 'loss', 'content': 0.06936249136924744, 'timestamp': '2025-09-10 02:38:24.738645', 'step': 10569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:24.769469', 'step': 10569, 'epoch': 2} {'type': 'loss', 'content': 0.07620812952518463, 'timestamp': '2025-09-10 02:38:24.772198', 'step': 10570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:24.810137', 'step': 10570, 'epoch': 2} {'type': 'loss', 'content': 0.07651203870773315, 'timestamp': '2025-09-10 02:38:24.812268', 'step': 10571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:24.843460', 'step': 10571, 'epoch': 2} {'type': 'loss', 'content': 0.13965246081352234, 'timestamp': '2025-09-10 02:38:24.867508', 'step': 10572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:24.898378', 'step': 10572, 'epoch': 2} {'type': 'loss', 'content': 0.11695902049541473, 'timestamp': '2025-09-10 02:38:24.900813', 'step': 10573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:24.932054', 'step': 10573, 'epoch': 2} {'type': 'loss', 'content': 0.11207092553377151, 'timestamp': '2025-09-10 02:38:24.934591', 'step': 10574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:24.965002', 'step': 10574, 'epoch': 2} {'type': 'loss', 'content': 0.11515363305807114, 'timestamp': '2025-09-10 02:38:24.967458', 'step': 10575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:24.998219', 'step': 10575, 'epoch': 2} {'type': 'loss', 'content': 0.1789514273405075, 'timestamp': '2025-09-10 02:38:25.021748', 'step': 10576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:25.054301', 'step': 10576, 'epoch': 2} {'type': 'loss', 'content': 0.152138352394104, 'timestamp': '2025-09-10 02:38:25.056940', 'step': 10577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:25.089751', 'step': 10577, 'epoch': 2} {'type': 'loss', 'content': 0.05052768066525459, 'timestamp': '2025-09-10 02:38:25.092219', 'step': 10578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:25.124019', 'step': 10578, 'epoch': 2} {'type': 'loss', 'content': 0.1178954541683197, 'timestamp': '2025-09-10 02:38:25.126383', 'step': 10579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:25.156970', 'step': 10579, 'epoch': 2} {'type': 'loss', 'content': 0.008857511915266514, 'timestamp': '2025-09-10 02:38:25.180591', 'step': 10580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:25.212241', 'step': 10580, 'epoch': 2} {'type': 'loss', 'content': 0.07794875651597977, 'timestamp': '2025-09-10 02:38:25.214434', 'step': 10581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:25.244836', 'step': 10581, 'epoch': 2} {'type': 'loss', 'content': 0.1121463030576706, 'timestamp': '2025-09-10 02:38:25.247348', 'step': 10582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:25.277917', 'step': 10582, 'epoch': 2} {'type': 'loss', 'content': 0.05897730588912964, 'timestamp': '2025-09-10 02:38:25.280575', 'step': 10583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:25.310359', 'step': 10583, 'epoch': 2} {'type': 'loss', 'content': 0.07069318741559982, 'timestamp': '2025-09-10 02:38:25.334073', 'step': 10584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:25.364540', 'step': 10584, 'epoch': 2} {'type': 'loss', 'content': 0.10743670910596848, 'timestamp': '2025-09-10 02:38:25.368568', 'step': 10585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:25.400745', 'step': 10585, 'epoch': 2} {'type': 'loss', 'content': 0.12205299735069275, 'timestamp': '2025-09-10 02:38:25.402983', 'step': 10586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:25.433506', 'step': 10586, 'epoch': 2} {'type': 'loss', 'content': 0.06714049726724625, 'timestamp': '2025-09-10 02:38:25.436737', 'step': 10587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:25.467238', 'step': 10587, 'epoch': 2} {'type': 'loss', 'content': 0.06829233467578888, 'timestamp': '2025-09-10 02:38:25.492741', 'step': 10588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:25.522760', 'step': 10588, 'epoch': 2} {'type': 'loss', 'content': 0.09670628607273102, 'timestamp': '2025-09-10 02:38:25.524952', 'step': 10589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:25.554971', 'step': 10589, 'epoch': 2} {'type': 'loss', 'content': 0.11428461968898773, 'timestamp': '2025-09-10 02:38:25.557193', 'step': 10590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:25.588347', 'step': 10590, 'epoch': 2} {'type': 'loss', 'content': 0.12860272824764252, 'timestamp': '2025-09-10 02:38:25.590623', 'step': 10591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:25.620486', 'step': 10591, 'epoch': 2} {'type': 'loss', 'content': 0.12435941398143768, 'timestamp': '2025-09-10 02:38:25.644075', 'step': 10592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:25.675579', 'step': 10592, 'epoch': 2} {'type': 'loss', 'content': 0.16602754592895508, 'timestamp': '2025-09-10 02:38:25.677802', 'step': 10593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:25.708131', 'step': 10593, 'epoch': 2} {'type': 'loss', 'content': 0.08699730038642883, 'timestamp': '2025-09-10 02:38:25.710296', 'step': 10594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:25.740659', 'step': 10594, 'epoch': 2} {'type': 'loss', 'content': 0.11987672746181488, 'timestamp': '2025-09-10 02:38:25.742906', 'step': 10595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:25.773180', 'step': 10595, 'epoch': 2} {'type': 'loss', 'content': 0.14407023787498474, 'timestamp': '2025-09-10 02:38:25.796653', 'step': 10596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:25.827384', 'step': 10596, 'epoch': 2} {'type': 'loss', 'content': 0.09600736945867538, 'timestamp': '2025-09-10 02:38:25.829777', 'step': 10597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:25.860059', 'step': 10597, 'epoch': 2} {'type': 'loss', 'content': 0.08402096480131149, 'timestamp': '2025-09-10 02:38:25.862362', 'step': 10598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:25.892637', 'step': 10598, 'epoch': 2} {'type': 'loss', 'content': 0.06251755356788635, 'timestamp': '2025-09-10 02:38:25.894927', 'step': 10599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:25.925779', 'step': 10599, 'epoch': 2} {'type': 'loss', 'content': 0.16244274377822876, 'timestamp': '2025-09-10 02:38:25.949083', 'step': 10600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:38:25.979909', 'step': 10600, 'epoch': 2} {'type': 'loss', 'content': 0.16025590896606445, 'timestamp': '2025-09-10 02:38:25.982249', 'step': 10601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.013136', 'step': 10601, 'epoch': 2} {'type': 'loss', 'content': 0.055279139429330826, 'timestamp': '2025-09-10 02:38:26.015712', 'step': 10602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:26.046645', 'step': 10602, 'epoch': 2} {'type': 'loss', 'content': 0.1515922099351883, 'timestamp': '2025-09-10 02:38:26.048540', 'step': 10603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:26.078898', 'step': 10603, 'epoch': 2} {'type': 'loss', 'content': 0.08741997182369232, 'timestamp': '2025-09-10 02:38:26.102443', 'step': 10604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:26.132808', 'step': 10604, 'epoch': 2} {'type': 'loss', 'content': 0.11597293615341187, 'timestamp': '2025-09-10 02:38:26.135185', 'step': 10605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.165760', 'step': 10605, 'epoch': 2} {'type': 'loss', 'content': 0.16094569861888885, 'timestamp': '2025-09-10 02:38:26.168205', 'step': 10606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.198463', 'step': 10606, 'epoch': 2} {'type': 'loss', 'content': 0.10542629659175873, 'timestamp': '2025-09-10 02:38:26.200692', 'step': 10607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:26.230645', 'step': 10607, 'epoch': 2} {'type': 'loss', 'content': 0.15249483287334442, 'timestamp': '2025-09-10 02:38:26.254720', 'step': 10608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:26.285125', 'step': 10608, 'epoch': 2} {'type': 'loss', 'content': 0.1503596007823944, 'timestamp': '2025-09-10 02:38:26.288533', 'step': 10609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.319274', 'step': 10609, 'epoch': 2} {'type': 'loss', 'content': 0.1114991083741188, 'timestamp': '2025-09-10 02:38:26.321583', 'step': 10610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.352280', 'step': 10610, 'epoch': 2} {'type': 'loss', 'content': 0.17534154653549194, 'timestamp': '2025-09-10 02:38:26.354799', 'step': 10611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:26.385635', 'step': 10611, 'epoch': 2} {'type': 'loss', 'content': 0.08739645034074783, 'timestamp': '2025-09-10 02:38:26.409253', 'step': 10612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:26.439593', 'step': 10612, 'epoch': 2} {'type': 'loss', 'content': 0.13199466466903687, 'timestamp': '2025-09-10 02:38:26.441784', 'step': 10613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.473187', 'step': 10613, 'epoch': 2} {'type': 'loss', 'content': 0.11499711871147156, 'timestamp': '2025-09-10 02:38:26.475431', 'step': 10614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:26.507735', 'step': 10614, 'epoch': 2} {'type': 'loss', 'content': 0.14216817915439606, 'timestamp': '2025-09-10 02:38:26.509940', 'step': 10615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.539805', 'step': 10615, 'epoch': 2} {'type': 'loss', 'content': 0.12727579474449158, 'timestamp': '2025-09-10 02:38:26.563255', 'step': 10616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:26.593854', 'step': 10616, 'epoch': 2} {'type': 'loss', 'content': 0.11620516330003738, 'timestamp': '2025-09-10 02:38:26.596197', 'step': 10617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.626286', 'step': 10617, 'epoch': 2} {'type': 'loss', 'content': 0.10042952001094818, 'timestamp': '2025-09-10 02:38:26.628904', 'step': 10618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:26.658932', 'step': 10618, 'epoch': 2} {'type': 'loss', 'content': 0.08592551946640015, 'timestamp': '2025-09-10 02:38:26.669917', 'step': 10619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.708263', 'step': 10619, 'epoch': 2} {'type': 'loss', 'content': 0.14871561527252197, 'timestamp': '2025-09-10 02:38:26.731611', 'step': 10620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:26.762289', 'step': 10620, 'epoch': 2} {'type': 'loss', 'content': 0.14781278371810913, 'timestamp': '2025-09-10 02:38:26.764609', 'step': 10621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.795249', 'step': 10621, 'epoch': 2} {'type': 'loss', 'content': 0.093239925801754, 'timestamp': '2025-09-10 02:38:26.797639', 'step': 10622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:26.828158', 'step': 10622, 'epoch': 2} {'type': 'loss', 'content': 0.1669907569885254, 'timestamp': '2025-09-10 02:38:26.830405', 'step': 10623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:26.860024', 'step': 10623, 'epoch': 2} {'type': 'loss', 'content': 0.15307386219501495, 'timestamp': '2025-09-10 02:38:26.883544', 'step': 10624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:26.915263', 'step': 10624, 'epoch': 2} {'type': 'loss', 'content': 0.17000184953212738, 'timestamp': '2025-09-10 02:38:26.917515', 'step': 10625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:26.948611', 'step': 10625, 'epoch': 2} {'type': 'loss', 'content': 0.11500541865825653, 'timestamp': '2025-09-10 02:38:26.950781', 'step': 10626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:26.980393', 'step': 10626, 'epoch': 2} {'type': 'loss', 'content': 0.15604610741138458, 'timestamp': '2025-09-10 02:38:26.982634', 'step': 10627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:27.013491', 'step': 10627, 'epoch': 2} {'type': 'loss', 'content': 0.21562352776527405, 'timestamp': '2025-09-10 02:38:27.037094', 'step': 10628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:27.067212', 'step': 10628, 'epoch': 2} {'type': 'loss', 'content': 0.06181739270687103, 'timestamp': '2025-09-10 02:38:27.069540', 'step': 10629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.100123', 'step': 10629, 'epoch': 2} {'type': 'loss', 'content': 0.08075685054063797, 'timestamp': '2025-09-10 02:38:27.102234', 'step': 10630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.132517', 'step': 10630, 'epoch': 2} {'type': 'loss', 'content': 0.08537600189447403, 'timestamp': '2025-09-10 02:38:27.134835', 'step': 10631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:27.165209', 'step': 10631, 'epoch': 2} {'type': 'loss', 'content': 0.13839752972126007, 'timestamp': '2025-09-10 02:38:27.188637', 'step': 10632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.218494', 'step': 10632, 'epoch': 2} {'type': 'loss', 'content': 0.14130103588104248, 'timestamp': '2025-09-10 02:38:27.220779', 'step': 10633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:27.250327', 'step': 10633, 'epoch': 2} {'type': 'loss', 'content': 0.12746231257915497, 'timestamp': '2025-09-10 02:38:27.252790', 'step': 10634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:27.283857', 'step': 10634, 'epoch': 2} {'type': 'loss', 'content': 0.13603726029396057, 'timestamp': '2025-09-10 02:38:27.286537', 'step': 10635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.319206', 'step': 10635, 'epoch': 2} {'type': 'loss', 'content': 0.16550080478191376, 'timestamp': '2025-09-10 02:38:27.342807', 'step': 10636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.373309', 'step': 10636, 'epoch': 2} {'type': 'loss', 'content': 0.10324234515428543, 'timestamp': '2025-09-10 02:38:27.375459', 'step': 10637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:27.405626', 'step': 10637, 'epoch': 2} {'type': 'loss', 'content': 0.15437623858451843, 'timestamp': '2025-09-10 02:38:27.407919', 'step': 10638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:27.438512', 'step': 10638, 'epoch': 2} {'type': 'loss', 'content': 0.15439732372760773, 'timestamp': '2025-09-10 02:38:27.440744', 'step': 10639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:27.470330', 'step': 10639, 'epoch': 2} {'type': 'loss', 'content': 0.08410347998142242, 'timestamp': '2025-09-10 02:38:27.493690', 'step': 10640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:27.524215', 'step': 10640, 'epoch': 2} {'type': 'loss', 'content': 0.11884882301092148, 'timestamp': '2025-09-10 02:38:27.526361', 'step': 10641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:27.557004', 'step': 10641, 'epoch': 2} {'type': 'loss', 'content': 0.06496737152338028, 'timestamp': '2025-09-10 02:38:27.559610', 'step': 10642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.590018', 'step': 10642, 'epoch': 2} {'type': 'loss', 'content': 0.1021023839712143, 'timestamp': '2025-09-10 02:38:27.592406', 'step': 10643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:27.628377', 'step': 10643, 'epoch': 2} {'type': 'loss', 'content': 0.1393793374300003, 'timestamp': '2025-09-10 02:38:27.652460', 'step': 10644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:27.688590', 'step': 10644, 'epoch': 2} {'type': 'loss', 'content': 0.20468682050704956, 'timestamp': '2025-09-10 02:38:27.691045', 'step': 10645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.722353', 'step': 10645, 'epoch': 2} {'type': 'loss', 'content': 0.08783552050590515, 'timestamp': '2025-09-10 02:38:27.724527', 'step': 10646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:27.759009', 'step': 10646, 'epoch': 2} {'type': 'loss', 'content': 0.1383209228515625, 'timestamp': '2025-09-10 02:38:27.761685', 'step': 10647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:27.793206', 'step': 10647, 'epoch': 2} {'type': 'loss', 'content': 0.15755420923233032, 'timestamp': '2025-09-10 02:38:27.817342', 'step': 10648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.852530', 'step': 10648, 'epoch': 2} {'type': 'loss', 'content': 0.09412046521902084, 'timestamp': '2025-09-10 02:38:27.855404', 'step': 10649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.886138', 'step': 10649, 'epoch': 2} {'type': 'loss', 'content': 0.05719039961695671, 'timestamp': '2025-09-10 02:38:27.893679', 'step': 10650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.924553', 'step': 10650, 'epoch': 2} {'type': 'loss', 'content': 0.09320046752691269, 'timestamp': '2025-09-10 02:38:27.926647', 'step': 10651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:27.960678', 'step': 10651, 'epoch': 2} {'type': 'loss', 'content': 0.08295668661594391, 'timestamp': '2025-09-10 02:38:27.984228', 'step': 10652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:28.023516', 'step': 10652, 'epoch': 2} {'type': 'loss', 'content': 0.06853796541690826, 'timestamp': '2025-09-10 02:38:28.026234', 'step': 10653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:28.062680', 'step': 10653, 'epoch': 2} {'type': 'loss', 'content': 0.12441932410001755, 'timestamp': '2025-09-10 02:38:28.064818', 'step': 10654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.101313', 'step': 10654, 'epoch': 2} {'type': 'loss', 'content': 0.0785825327038765, 'timestamp': '2025-09-10 02:38:28.109580', 'step': 10655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.141296', 'step': 10655, 'epoch': 2} {'type': 'loss', 'content': 0.07166888564825058, 'timestamp': '2025-09-10 02:38:28.168837', 'step': 10656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.200873', 'step': 10656, 'epoch': 2} {'type': 'loss', 'content': 0.04441417381167412, 'timestamp': '2025-09-10 02:38:28.203342', 'step': 10657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:28.234290', 'step': 10657, 'epoch': 2} {'type': 'loss', 'content': 0.0646846666932106, 'timestamp': '2025-09-10 02:38:28.236807', 'step': 10658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.267749', 'step': 10658, 'epoch': 2} {'type': 'loss', 'content': 0.12694968283176422, 'timestamp': '2025-09-10 02:38:28.269903', 'step': 10659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:28.300630', 'step': 10659, 'epoch': 2} {'type': 'loss', 'content': 0.09019022434949875, 'timestamp': '2025-09-10 02:38:28.324077', 'step': 10660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.355569', 'step': 10660, 'epoch': 2} {'type': 'loss', 'content': 0.12318132817745209, 'timestamp': '2025-09-10 02:38:28.358085', 'step': 10661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.391601', 'step': 10661, 'epoch': 2} {'type': 'loss', 'content': 0.14151668548583984, 'timestamp': '2025-09-10 02:38:28.393996', 'step': 10662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:28.433018', 'step': 10662, 'epoch': 2} {'type': 'loss', 'content': 0.17505578696727753, 'timestamp': '2025-09-10 02:38:28.438420', 'step': 10663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:28.469543', 'step': 10663, 'epoch': 2} {'type': 'loss', 'content': 0.1865515559911728, 'timestamp': '2025-09-10 02:38:28.493624', 'step': 10664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:28.525018', 'step': 10664, 'epoch': 2} {'type': 'loss', 'content': 0.05951149761676788, 'timestamp': '2025-09-10 02:38:28.529394', 'step': 10665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.560682', 'step': 10665, 'epoch': 2} {'type': 'loss', 'content': 0.09807742387056351, 'timestamp': '2025-09-10 02:38:28.563942', 'step': 10666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:28.600321', 'step': 10666, 'epoch': 2} {'type': 'loss', 'content': 0.13121677935123444, 'timestamp': '2025-09-10 02:38:28.602986', 'step': 10667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:28.641854', 'step': 10667, 'epoch': 2} {'type': 'loss', 'content': 0.07711698114871979, 'timestamp': '2025-09-10 02:38:28.667194', 'step': 10668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:28.700876', 'step': 10668, 'epoch': 2} {'type': 'loss', 'content': 0.056508563458919525, 'timestamp': '2025-09-10 02:38:28.703081', 'step': 10669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.734131', 'step': 10669, 'epoch': 2} {'type': 'loss', 'content': 0.14858724176883698, 'timestamp': '2025-09-10 02:38:28.736337', 'step': 10670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:28.766025', 'step': 10670, 'epoch': 2} {'type': 'loss', 'content': 0.13220234215259552, 'timestamp': '2025-09-10 02:38:28.768539', 'step': 10671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:28.801331', 'step': 10671, 'epoch': 2} {'type': 'loss', 'content': 0.0652964860200882, 'timestamp': '2025-09-10 02:38:28.824734', 'step': 10672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.856172', 'step': 10672, 'epoch': 2} {'type': 'loss', 'content': 0.10207891464233398, 'timestamp': '2025-09-10 02:38:28.863406', 'step': 10673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.896929', 'step': 10673, 'epoch': 2} {'type': 'loss', 'content': 0.100792795419693, 'timestamp': '2025-09-10 02:38:28.898984', 'step': 10674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:28.934623', 'step': 10674, 'epoch': 2} {'type': 'loss', 'content': 0.13144172728061676, 'timestamp': '2025-09-10 02:38:28.945268', 'step': 10675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:28.978592', 'step': 10675, 'epoch': 2} {'type': 'loss', 'content': 0.11233970522880554, 'timestamp': '2025-09-10 02:38:29.002693', 'step': 10676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:29.033024', 'step': 10676, 'epoch': 2} {'type': 'loss', 'content': 0.1820024847984314, 'timestamp': '2025-09-10 02:38:29.036129', 'step': 10677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:29.066801', 'step': 10677, 'epoch': 2} {'type': 'loss', 'content': 0.0887240394949913, 'timestamp': '2025-09-10 02:38:29.070075', 'step': 10678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:29.100526', 'step': 10678, 'epoch': 2} {'type': 'loss', 'content': 0.12372829020023346, 'timestamp': '2025-09-10 02:38:29.103966', 'step': 10679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:29.135825', 'step': 10679, 'epoch': 2} {'type': 'loss', 'content': 0.10638653486967087, 'timestamp': '2025-09-10 02:38:29.160131', 'step': 10680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:29.195087', 'step': 10680, 'epoch': 2} {'type': 'loss', 'content': 0.16481104493141174, 'timestamp': '2025-09-10 02:38:29.197979', 'step': 10681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:29.227970', 'step': 10681, 'epoch': 2} {'type': 'loss', 'content': 0.12274910509586334, 'timestamp': '2025-09-10 02:38:29.231984', 'step': 10682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:29.265828', 'step': 10682, 'epoch': 2} {'type': 'loss', 'content': 0.15729686617851257, 'timestamp': '2025-09-10 02:38:29.270032', 'step': 10683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:38:29.301482', 'step': 10683, 'epoch': 2} {'type': 'loss', 'content': 0.13495014607906342, 'timestamp': '2025-09-10 02:38:29.326966', 'step': 10684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:29.358751', 'step': 10684, 'epoch': 2} {'type': 'loss', 'content': 0.08001191169023514, 'timestamp': '2025-09-10 02:38:29.361099', 'step': 10685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:29.392366', 'step': 10685, 'epoch': 2} {'type': 'loss', 'content': 0.07883081585168839, 'timestamp': '2025-09-10 02:38:29.394948', 'step': 10686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:29.426897', 'step': 10686, 'epoch': 2} {'type': 'loss', 'content': 0.16971415281295776, 'timestamp': '2025-09-10 02:38:29.431073', 'step': 10687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:29.462185', 'step': 10687, 'epoch': 2} {'type': 'loss', 'content': 0.19356800615787506, 'timestamp': '2025-09-10 02:38:29.485911', 'step': 10688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:29.516165', 'step': 10688, 'epoch': 2} {'type': 'loss', 'content': 0.08368922024965286, 'timestamp': '2025-09-10 02:38:29.518690', 'step': 10689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:29.553631', 'step': 10689, 'epoch': 2} {'type': 'loss', 'content': 0.12343678623437881, 'timestamp': '2025-09-10 02:38:29.557250', 'step': 10690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:29.592744', 'step': 10690, 'epoch': 2} {'type': 'loss', 'content': 0.0883195549249649, 'timestamp': '2025-09-10 02:38:29.597098', 'step': 10691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:29.633211', 'step': 10691, 'epoch': 2} {'type': 'loss', 'content': 0.1304401308298111, 'timestamp': '2025-09-10 02:38:29.662664', 'step': 10692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:29.699798', 'step': 10692, 'epoch': 2} {'type': 'loss', 'content': 0.05402424558997154, 'timestamp': '2025-09-10 02:38:29.701927', 'step': 10693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:29.732131', 'step': 10693, 'epoch': 2} {'type': 'loss', 'content': 0.07520616799592972, 'timestamp': '2025-09-10 02:38:29.734632', 'step': 10694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:29.768193', 'step': 10694, 'epoch': 2} {'type': 'loss', 'content': 0.21222610771656036, 'timestamp': '2025-09-10 02:38:29.770695', 'step': 10695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:29.802352', 'step': 10695, 'epoch': 2} {'type': 'loss', 'content': 0.08355937153100967, 'timestamp': '2025-09-10 02:38:29.825741', 'step': 10696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:29.863518', 'step': 10696, 'epoch': 2} {'type': 'loss', 'content': 0.12743450701236725, 'timestamp': '2025-09-10 02:38:29.865980', 'step': 10697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:29.897349', 'step': 10697, 'epoch': 2} {'type': 'loss', 'content': 0.14268215000629425, 'timestamp': '2025-09-10 02:38:29.899561', 'step': 10698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:29.929892', 'step': 10698, 'epoch': 2} {'type': 'loss', 'content': 0.08323287963867188, 'timestamp': '2025-09-10 02:38:29.935432', 'step': 10699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:29.973704', 'step': 10699, 'epoch': 2} {'type': 'loss', 'content': 0.09117882698774338, 'timestamp': '2025-09-10 02:38:29.997317', 'step': 10700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:30.030394', 'step': 10700, 'epoch': 2} {'type': 'loss', 'content': 0.10901104658842087, 'timestamp': '2025-09-10 02:38:30.032705', 'step': 10701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:30.066538', 'step': 10701, 'epoch': 2} {'type': 'loss', 'content': 0.20179852843284607, 'timestamp': '2025-09-10 02:38:30.069093', 'step': 10702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:30.105487', 'step': 10702, 'epoch': 2} {'type': 'loss', 'content': 0.06108994036912918, 'timestamp': '2025-09-10 02:38:30.107639', 'step': 10703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:30.138522', 'step': 10703, 'epoch': 2} {'type': 'loss', 'content': 0.15915845334529877, 'timestamp': '2025-09-10 02:38:30.162832', 'step': 10704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:30.196963', 'step': 10704, 'epoch': 2} {'type': 'loss', 'content': 0.07584751397371292, 'timestamp': '2025-09-10 02:38:30.199488', 'step': 10705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:30.230204', 'step': 10705, 'epoch': 2} {'type': 'loss', 'content': 0.16087615489959717, 'timestamp': '2025-09-10 02:38:30.232612', 'step': 10706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:30.263489', 'step': 10706, 'epoch': 2} {'type': 'loss', 'content': 0.038626741617918015, 'timestamp': '2025-09-10 02:38:30.265973', 'step': 10707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:30.295989', 'step': 10707, 'epoch': 2} {'type': 'loss', 'content': 0.20347186923027039, 'timestamp': '2025-09-10 02:38:30.319674', 'step': 10708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:30.351177', 'step': 10708, 'epoch': 2} {'type': 'loss', 'content': 0.16217106580734253, 'timestamp': '2025-09-10 02:38:30.353847', 'step': 10709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:30.383950', 'step': 10709, 'epoch': 2} {'type': 'loss', 'content': 0.09841594099998474, 'timestamp': '2025-09-10 02:38:30.386218', 'step': 10710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:30.416797', 'step': 10710, 'epoch': 2} {'type': 'loss', 'content': 0.08001507073640823, 'timestamp': '2025-09-10 02:38:30.422075', 'step': 10711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:30.460289', 'step': 10711, 'epoch': 2} {'type': 'loss', 'content': 0.07466836273670197, 'timestamp': '2025-09-10 02:38:30.483894', 'step': 10712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:30.515383', 'step': 10712, 'epoch': 2} {'type': 'loss', 'content': 0.07236737757921219, 'timestamp': '2025-09-10 02:38:30.517681', 'step': 10713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:30.548701', 'step': 10713, 'epoch': 2} {'type': 'loss', 'content': 0.08969074487686157, 'timestamp': '2025-09-10 02:38:30.550920', 'step': 10714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:30.580984', 'step': 10714, 'epoch': 2} {'type': 'loss', 'content': 0.08970291167497635, 'timestamp': '2025-09-10 02:38:30.583385', 'step': 10715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:30.613781', 'step': 10715, 'epoch': 2} {'type': 'loss', 'content': 0.07992887496948242, 'timestamp': '2025-09-10 02:38:30.637569', 'step': 10716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:30.669522', 'step': 10716, 'epoch': 2} {'type': 'loss', 'content': 0.10009463876485825, 'timestamp': '2025-09-10 02:38:30.672106', 'step': 10717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:30.702605', 'step': 10717, 'epoch': 2} {'type': 'loss', 'content': 0.10221099853515625, 'timestamp': '2025-09-10 02:38:30.704906', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:38:38.619506', 'step': 10718, 'epoch': 2} {'type': 'pplx', 'content': 12608.090050331528, 'timestamp': '2025-09-10 02:38:38.622661', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:38.652970', 'step': 10718, 'epoch': 2} {'type': 'loss', 'content': 0.1504030078649521, 'timestamp': '2025-09-10 02:38:38.655572', 'step': 10719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:38.687305', 'step': 10719, 'epoch': 2} {'type': 'loss', 'content': 0.08307001739740372, 'timestamp': '2025-09-10 02:38:38.711184', 'step': 10720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:38.747508', 'step': 10720, 'epoch': 2} {'type': 'loss', 'content': 0.11917814612388611, 'timestamp': '2025-09-10 02:38:38.749931', 'step': 10721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:38.780595', 'step': 10721, 'epoch': 2} {'type': 'loss', 'content': 0.04152374714612961, 'timestamp': '2025-09-10 02:38:38.783002', 'step': 10722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:38.814108', 'step': 10722, 'epoch': 2} {'type': 'loss', 'content': 0.11933304369449615, 'timestamp': '2025-09-10 02:38:38.816267', 'step': 10723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:38.846255', 'step': 10723, 'epoch': 2} {'type': 'loss', 'content': 0.10030409693717957, 'timestamp': '2025-09-10 02:38:38.871323', 'step': 10724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:38.901738', 'step': 10724, 'epoch': 2} {'type': 'loss', 'content': 0.12879535555839539, 'timestamp': '2025-09-10 02:38:38.904069', 'step': 10725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:38.934004', 'step': 10725, 'epoch': 2} {'type': 'loss', 'content': 0.19670343399047852, 'timestamp': '2025-09-10 02:38:38.936291', 'step': 10726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:38.968989', 'step': 10726, 'epoch': 2} {'type': 'loss', 'content': 0.20052169263362885, 'timestamp': '2025-09-10 02:38:38.971371', 'step': 10727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:39.001550', 'step': 10727, 'epoch': 2} {'type': 'loss', 'content': 0.10295157134532928, 'timestamp': '2025-09-10 02:38:39.025170', 'step': 10728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.054783', 'step': 10728, 'epoch': 2} {'type': 'loss', 'content': 0.14030228555202484, 'timestamp': '2025-09-10 02:38:39.057155', 'step': 10729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:39.087458', 'step': 10729, 'epoch': 2} {'type': 'loss', 'content': 0.08777347207069397, 'timestamp': '2025-09-10 02:38:39.090063', 'step': 10730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.121767', 'step': 10730, 'epoch': 2} {'type': 'loss', 'content': 0.10767331719398499, 'timestamp': '2025-09-10 02:38:39.124089', 'step': 10731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:39.153844', 'step': 10731, 'epoch': 2} {'type': 'loss', 'content': 0.021371014416217804, 'timestamp': '2025-09-10 02:38:39.177142', 'step': 10732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.208863', 'step': 10732, 'epoch': 2} {'type': 'loss', 'content': 0.10492338240146637, 'timestamp': '2025-09-10 02:38:39.213489', 'step': 10733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:39.249937', 'step': 10733, 'epoch': 2} {'type': 'loss', 'content': 0.11099454015493393, 'timestamp': '2025-09-10 02:38:39.252807', 'step': 10734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:39.289192', 'step': 10734, 'epoch': 2} {'type': 'loss', 'content': 0.04770304635167122, 'timestamp': '2025-09-10 02:38:39.295394', 'step': 10735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.332297', 'step': 10735, 'epoch': 2} {'type': 'loss', 'content': 0.11417355388402939, 'timestamp': '2025-09-10 02:38:39.355668', 'step': 10736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:39.386186', 'step': 10736, 'epoch': 2} {'type': 'loss', 'content': 0.11385740339756012, 'timestamp': '2025-09-10 02:38:39.388760', 'step': 10737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.421561', 'step': 10737, 'epoch': 2} {'type': 'loss', 'content': 0.07407310605049133, 'timestamp': '2025-09-10 02:38:39.426597', 'step': 10738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:39.461166', 'step': 10738, 'epoch': 2} {'type': 'loss', 'content': 0.18907468020915985, 'timestamp': '2025-09-10 02:38:39.463351', 'step': 10739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:39.494842', 'step': 10739, 'epoch': 2} {'type': 'loss', 'content': 0.12028566747903824, 'timestamp': '2025-09-10 02:38:39.521851', 'step': 10740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:39.552693', 'step': 10740, 'epoch': 2} {'type': 'loss', 'content': 0.07959870249032974, 'timestamp': '2025-09-10 02:38:39.556071', 'step': 10741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:39.587376', 'step': 10741, 'epoch': 2} {'type': 'loss', 'content': 0.053767625242471695, 'timestamp': '2025-09-10 02:38:39.589531', 'step': 10742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.619438', 'step': 10742, 'epoch': 2} {'type': 'loss', 'content': 0.1560458093881607, 'timestamp': '2025-09-10 02:38:39.624248', 'step': 10743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:39.656025', 'step': 10743, 'epoch': 2} {'type': 'loss', 'content': 0.09256544709205627, 'timestamp': '2025-09-10 02:38:39.679995', 'step': 10744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:39.715713', 'step': 10744, 'epoch': 2} {'type': 'loss', 'content': 0.20723362267017365, 'timestamp': '2025-09-10 02:38:39.718060', 'step': 10745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.748892', 'step': 10745, 'epoch': 2} {'type': 'loss', 'content': 0.08866458386182785, 'timestamp': '2025-09-10 02:38:39.753348', 'step': 10746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:39.785190', 'step': 10746, 'epoch': 2} {'type': 'loss', 'content': 0.10834463685750961, 'timestamp': '2025-09-10 02:38:39.787659', 'step': 10747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.817537', 'step': 10747, 'epoch': 2} {'type': 'loss', 'content': 0.15449918806552887, 'timestamp': '2025-09-10 02:38:39.841063', 'step': 10748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:39.871457', 'step': 10748, 'epoch': 2} {'type': 'loss', 'content': 0.12642185389995575, 'timestamp': '2025-09-10 02:38:39.873776', 'step': 10749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.904527', 'step': 10749, 'epoch': 2} {'type': 'loss', 'content': 0.20995540916919708, 'timestamp': '2025-09-10 02:38:39.906903', 'step': 10750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:39.942174', 'step': 10750, 'epoch': 2} {'type': 'loss', 'content': 0.12448190897703171, 'timestamp': '2025-09-10 02:38:39.944580', 'step': 10751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:39.976376', 'step': 10751, 'epoch': 2} {'type': 'loss', 'content': 0.03637548163533211, 'timestamp': '2025-09-10 02:38:40.000026', 'step': 10752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:40.030501', 'step': 10752, 'epoch': 2} {'type': 'loss', 'content': 0.17220786213874817, 'timestamp': '2025-09-10 02:38:40.039070', 'step': 10753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:40.075665', 'step': 10753, 'epoch': 2} {'type': 'loss', 'content': 0.15766845643520355, 'timestamp': '2025-09-10 02:38:40.078234', 'step': 10754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:40.108490', 'step': 10754, 'epoch': 2} {'type': 'loss', 'content': 0.08286713808774948, 'timestamp': '2025-09-10 02:38:40.111102', 'step': 10755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:40.142394', 'step': 10755, 'epoch': 2} {'type': 'loss', 'content': 0.0991622805595398, 'timestamp': '2025-09-10 02:38:40.165982', 'step': 10756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:40.197776', 'step': 10756, 'epoch': 2} {'type': 'loss', 'content': 0.08850714564323425, 'timestamp': '2025-09-10 02:38:40.199983', 'step': 10757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:40.231341', 'step': 10757, 'epoch': 2} {'type': 'loss', 'content': 0.08421965688467026, 'timestamp': '2025-09-10 02:38:40.233613', 'step': 10758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:40.264815', 'step': 10758, 'epoch': 2} {'type': 'loss', 'content': 0.0439116433262825, 'timestamp': '2025-09-10 02:38:40.267073', 'step': 10759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:40.297547', 'step': 10759, 'epoch': 2} {'type': 'loss', 'content': 0.10238077491521835, 'timestamp': '2025-09-10 02:38:40.321257', 'step': 10760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:40.351931', 'step': 10760, 'epoch': 2} {'type': 'loss', 'content': 0.053697146475315094, 'timestamp': '2025-09-10 02:38:40.354116', 'step': 10761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:40.384956', 'step': 10761, 'epoch': 2} {'type': 'loss', 'content': 0.19242481887340546, 'timestamp': '2025-09-10 02:38:40.387908', 'step': 10762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:40.417825', 'step': 10762, 'epoch': 2} {'type': 'loss', 'content': 0.09955046325922012, 'timestamp': '2025-09-10 02:38:40.420074', 'step': 10763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:40.449690', 'step': 10763, 'epoch': 2} {'type': 'loss', 'content': 0.05895128846168518, 'timestamp': '2025-09-10 02:38:40.473210', 'step': 10764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:40.503870', 'step': 10764, 'epoch': 2} {'type': 'loss', 'content': 0.18512682616710663, 'timestamp': '2025-09-10 02:38:40.506503', 'step': 10765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:40.538557', 'step': 10765, 'epoch': 2} {'type': 'loss', 'content': 0.05603775754570961, 'timestamp': '2025-09-10 02:38:40.540711', 'step': 10766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:40.571249', 'step': 10766, 'epoch': 2} {'type': 'loss', 'content': 0.13791845738887787, 'timestamp': '2025-09-10 02:38:40.573459', 'step': 10767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:40.602744', 'step': 10767, 'epoch': 2} {'type': 'loss', 'content': 0.10183107852935791, 'timestamp': '2025-09-10 02:38:40.626182', 'step': 10768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:40.656348', 'step': 10768, 'epoch': 2} {'type': 'loss', 'content': 0.13194087147712708, 'timestamp': '2025-09-10 02:38:40.658750', 'step': 10769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:40.690495', 'step': 10769, 'epoch': 2} {'type': 'loss', 'content': 0.0845804512500763, 'timestamp': '2025-09-10 02:38:40.692657', 'step': 10770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:40.722245', 'step': 10770, 'epoch': 2} {'type': 'loss', 'content': 0.13686791062355042, 'timestamp': '2025-09-10 02:38:40.724771', 'step': 10771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:40.756423', 'step': 10771, 'epoch': 2} {'type': 'loss', 'content': 0.0836024135351181, 'timestamp': '2025-09-10 02:38:40.779814', 'step': 10772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:40.811264', 'step': 10772, 'epoch': 2} {'type': 'loss', 'content': 0.07152405381202698, 'timestamp': '2025-09-10 02:38:40.813652', 'step': 10773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:40.844502', 'step': 10773, 'epoch': 2} {'type': 'loss', 'content': 0.10556580871343613, 'timestamp': '2025-09-10 02:38:40.846935', 'step': 10774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:40.877111', 'step': 10774, 'epoch': 2} {'type': 'loss', 'content': 0.10944736003875732, 'timestamp': '2025-09-10 02:38:40.879502', 'step': 10775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:40.909815', 'step': 10775, 'epoch': 2} {'type': 'loss', 'content': 0.07854799181222916, 'timestamp': '2025-09-10 02:38:40.933343', 'step': 10776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:40.964382', 'step': 10776, 'epoch': 2} {'type': 'loss', 'content': 0.067728191614151, 'timestamp': '2025-09-10 02:38:40.968318', 'step': 10777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:40.998129', 'step': 10777, 'epoch': 2} {'type': 'loss', 'content': 0.11382332444190979, 'timestamp': '2025-09-10 02:38:41.000449', 'step': 10778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:41.030677', 'step': 10778, 'epoch': 2} {'type': 'loss', 'content': 0.12967157363891602, 'timestamp': '2025-09-10 02:38:41.032763', 'step': 10779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:41.062597', 'step': 10779, 'epoch': 2} {'type': 'loss', 'content': 0.062388043850660324, 'timestamp': '2025-09-10 02:38:41.086025', 'step': 10780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:41.116866', 'step': 10780, 'epoch': 2} {'type': 'loss', 'content': 0.14971274137496948, 'timestamp': '2025-09-10 02:38:41.119002', 'step': 10781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:41.149730', 'step': 10781, 'epoch': 2} {'type': 'loss', 'content': 0.06661200523376465, 'timestamp': '2025-09-10 02:38:41.152223', 'step': 10782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:41.183374', 'step': 10782, 'epoch': 2} {'type': 'loss', 'content': 0.04627980664372444, 'timestamp': '2025-09-10 02:38:41.187732', 'step': 10783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:41.218331', 'step': 10783, 'epoch': 2} {'type': 'loss', 'content': 0.06994993984699249, 'timestamp': '2025-09-10 02:38:41.241822', 'step': 10784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:41.271859', 'step': 10784, 'epoch': 2} {'type': 'loss', 'content': 0.21431595087051392, 'timestamp': '2025-09-10 02:38:41.274285', 'step': 10785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:41.311740', 'step': 10785, 'epoch': 2} {'type': 'loss', 'content': 0.07631515711545944, 'timestamp': '2025-09-10 02:38:41.315322', 'step': 10786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:41.348963', 'step': 10786, 'epoch': 2} {'type': 'loss', 'content': 0.10430413484573364, 'timestamp': '2025-09-10 02:38:41.353701', 'step': 10787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:38:41.384503', 'step': 10787, 'epoch': 2} {'type': 'loss', 'content': 0.1504850834608078, 'timestamp': '2025-09-10 02:38:41.409789', 'step': 10788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:41.453042', 'step': 10788, 'epoch': 2} {'type': 'loss', 'content': 0.1534152776002884, 'timestamp': '2025-09-10 02:38:41.455969', 'step': 10789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:41.488908', 'step': 10789, 'epoch': 2} {'type': 'loss', 'content': 0.09904119372367859, 'timestamp': '2025-09-10 02:38:41.492345', 'step': 10790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:41.524186', 'step': 10790, 'epoch': 2} {'type': 'loss', 'content': 0.06291292607784271, 'timestamp': '2025-09-10 02:38:41.526696', 'step': 10791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:41.557206', 'step': 10791, 'epoch': 2} {'type': 'loss', 'content': 0.11449839919805527, 'timestamp': '2025-09-10 02:38:41.584814', 'step': 10792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:41.623003', 'step': 10792, 'epoch': 2} {'type': 'loss', 'content': 0.054433006793260574, 'timestamp': '2025-09-10 02:38:41.626080', 'step': 10793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:41.656342', 'step': 10793, 'epoch': 2} {'type': 'loss', 'content': 0.14604125916957855, 'timestamp': '2025-09-10 02:38:41.658825', 'step': 10794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:41.693786', 'step': 10794, 'epoch': 2} {'type': 'loss', 'content': 0.055330947041511536, 'timestamp': '2025-09-10 02:38:41.696321', 'step': 10795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:41.727909', 'step': 10795, 'epoch': 2} {'type': 'loss', 'content': 0.1280529797077179, 'timestamp': '2025-09-10 02:38:41.756864', 'step': 10796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:41.786613', 'step': 10796, 'epoch': 2} {'type': 'loss', 'content': 0.1030237004160881, 'timestamp': '2025-09-10 02:38:41.788963', 'step': 10797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:41.823295', 'step': 10797, 'epoch': 2} {'type': 'loss', 'content': 0.08330824971199036, 'timestamp': '2025-09-10 02:38:41.825851', 'step': 10798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:41.857510', 'step': 10798, 'epoch': 2} {'type': 'loss', 'content': 0.16896961629390717, 'timestamp': '2025-09-10 02:38:41.861366', 'step': 10799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:38:41.901477', 'step': 10799, 'epoch': 2} {'type': 'loss', 'content': 0.07803796976804733, 'timestamp': '2025-09-10 02:38:41.938370', 'step': 10800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:41.971418', 'step': 10800, 'epoch': 2} {'type': 'loss', 'content': 0.11900510638952255, 'timestamp': '2025-09-10 02:38:41.973668', 'step': 10801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:42.016596', 'step': 10801, 'epoch': 2} {'type': 'loss', 'content': 0.20094025135040283, 'timestamp': '2025-09-10 02:38:42.025374', 'step': 10802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.062343', 'step': 10802, 'epoch': 2} {'type': 'loss', 'content': 0.1368386149406433, 'timestamp': '2025-09-10 02:38:42.064652', 'step': 10803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.095886', 'step': 10803, 'epoch': 2} {'type': 'loss', 'content': 0.18309880793094635, 'timestamp': '2025-09-10 02:38:42.120826', 'step': 10804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:42.159307', 'step': 10804, 'epoch': 2} {'type': 'loss', 'content': 0.0737544596195221, 'timestamp': '2025-09-10 02:38:42.161740', 'step': 10805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:42.192442', 'step': 10805, 'epoch': 2} {'type': 'loss', 'content': 0.14855796098709106, 'timestamp': '2025-09-10 02:38:42.194859', 'step': 10806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:42.225217', 'step': 10806, 'epoch': 2} {'type': 'loss', 'content': 0.09362573176622391, 'timestamp': '2025-09-10 02:38:42.227373', 'step': 10807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.258702', 'step': 10807, 'epoch': 2} {'type': 'loss', 'content': 0.09576433151960373, 'timestamp': '2025-09-10 02:38:42.283099', 'step': 10808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.316725', 'step': 10808, 'epoch': 2} {'type': 'loss', 'content': 0.16088055074214935, 'timestamp': '2025-09-10 02:38:42.319064', 'step': 10809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:42.348746', 'step': 10809, 'epoch': 2} {'type': 'loss', 'content': 0.059640076011419296, 'timestamp': '2025-09-10 02:38:42.352159', 'step': 10810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.384272', 'step': 10810, 'epoch': 2} {'type': 'loss', 'content': 0.07937993109226227, 'timestamp': '2025-09-10 02:38:42.387648', 'step': 10811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.419045', 'step': 10811, 'epoch': 2} {'type': 'loss', 'content': 0.07176409661769867, 'timestamp': '2025-09-10 02:38:42.442526', 'step': 10812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:42.473628', 'step': 10812, 'epoch': 2} {'type': 'loss', 'content': 0.06961352378129959, 'timestamp': '2025-09-10 02:38:42.476852', 'step': 10813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:42.511485', 'step': 10813, 'epoch': 2} {'type': 'loss', 'content': 0.13489437103271484, 'timestamp': '2025-09-10 02:38:42.514707', 'step': 10814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:42.548989', 'step': 10814, 'epoch': 2} {'type': 'loss', 'content': 0.13991966843605042, 'timestamp': '2025-09-10 02:38:42.551390', 'step': 10815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:42.583484', 'step': 10815, 'epoch': 2} {'type': 'loss', 'content': 0.11983123421669006, 'timestamp': '2025-09-10 02:38:42.611653', 'step': 10816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.653243', 'step': 10816, 'epoch': 2} {'type': 'loss', 'content': 0.09905463457107544, 'timestamp': '2025-09-10 02:38:42.655791', 'step': 10817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:42.687454', 'step': 10817, 'epoch': 2} {'type': 'loss', 'content': 0.07533886283636093, 'timestamp': '2025-09-10 02:38:42.691257', 'step': 10818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:42.721530', 'step': 10818, 'epoch': 2} {'type': 'loss', 'content': 0.05469125136733055, 'timestamp': '2025-09-10 02:38:42.724044', 'step': 10819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:42.767320', 'step': 10819, 'epoch': 2} {'type': 'loss', 'content': 0.13071312010288239, 'timestamp': '2025-09-10 02:38:42.791173', 'step': 10820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.828312', 'step': 10820, 'epoch': 2} {'type': 'loss', 'content': 0.13895408809185028, 'timestamp': '2025-09-10 02:38:42.830686', 'step': 10821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.860671', 'step': 10821, 'epoch': 2} {'type': 'loss', 'content': 0.1292421519756317, 'timestamp': '2025-09-10 02:38:42.863084', 'step': 10822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:42.893058', 'step': 10822, 'epoch': 2} {'type': 'loss', 'content': 0.03565492108464241, 'timestamp': '2025-09-10 02:38:42.895144', 'step': 10823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.925217', 'step': 10823, 'epoch': 2} {'type': 'loss', 'content': 0.08238178491592407, 'timestamp': '2025-09-10 02:38:42.949200', 'step': 10824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:42.983035', 'step': 10824, 'epoch': 2} {'type': 'loss', 'content': 0.21318070590496063, 'timestamp': '2025-09-10 02:38:42.985386', 'step': 10825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:43.015161', 'step': 10825, 'epoch': 2} {'type': 'loss', 'content': 0.05100780352950096, 'timestamp': '2025-09-10 02:38:43.017497', 'step': 10826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:43.053238', 'step': 10826, 'epoch': 2} {'type': 'loss', 'content': 0.12177480012178421, 'timestamp': '2025-09-10 02:38:43.055545', 'step': 10827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:43.086181', 'step': 10827, 'epoch': 2} {'type': 'loss', 'content': 0.09349138289690018, 'timestamp': '2025-09-10 02:38:43.110960', 'step': 10828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.141993', 'step': 10828, 'epoch': 2} {'type': 'loss', 'content': 0.12937410175800323, 'timestamp': '2025-09-10 02:38:43.144659', 'step': 10829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:43.175633', 'step': 10829, 'epoch': 2} {'type': 'loss', 'content': 0.08472034335136414, 'timestamp': '2025-09-10 02:38:43.178294', 'step': 10830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:43.210266', 'step': 10830, 'epoch': 2} {'type': 'loss', 'content': 0.14978012442588806, 'timestamp': '2025-09-10 02:38:43.212770', 'step': 10831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:43.247155', 'step': 10831, 'epoch': 2} {'type': 'loss', 'content': 0.0815686583518982, 'timestamp': '2025-09-10 02:38:43.271317', 'step': 10832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:43.302169', 'step': 10832, 'epoch': 2} {'type': 'loss', 'content': 0.08222740143537521, 'timestamp': '2025-09-10 02:38:43.304551', 'step': 10833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:43.336623', 'step': 10833, 'epoch': 2} {'type': 'loss', 'content': 0.1194189265370369, 'timestamp': '2025-09-10 02:38:43.339518', 'step': 10834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.370164', 'step': 10834, 'epoch': 2} {'type': 'loss', 'content': 0.06230177357792854, 'timestamp': '2025-09-10 02:38:43.372529', 'step': 10835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.403660', 'step': 10835, 'epoch': 2} {'type': 'loss', 'content': 0.17383241653442383, 'timestamp': '2025-09-10 02:38:43.427271', 'step': 10836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:43.457556', 'step': 10836, 'epoch': 2} {'type': 'loss', 'content': 0.07507269829511642, 'timestamp': '2025-09-10 02:38:43.460358', 'step': 10837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:43.491908', 'step': 10837, 'epoch': 2} {'type': 'loss', 'content': 0.10839233547449112, 'timestamp': '2025-09-10 02:38:43.494782', 'step': 10838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.527030', 'step': 10838, 'epoch': 2} {'type': 'loss', 'content': 0.09648720920085907, 'timestamp': '2025-09-10 02:38:43.529395', 'step': 10839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:43.559029', 'step': 10839, 'epoch': 2} {'type': 'loss', 'content': 0.14068551361560822, 'timestamp': '2025-09-10 02:38:43.582549', 'step': 10840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:43.613633', 'step': 10840, 'epoch': 2} {'type': 'loss', 'content': 0.10044743865728378, 'timestamp': '2025-09-10 02:38:43.616239', 'step': 10841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:43.647312', 'step': 10841, 'epoch': 2} {'type': 'loss', 'content': 0.17027020454406738, 'timestamp': '2025-09-10 02:38:43.649719', 'step': 10842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.681140', 'step': 10842, 'epoch': 2} {'type': 'loss', 'content': 0.14045363664627075, 'timestamp': '2025-09-10 02:38:43.683757', 'step': 10843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.715697', 'step': 10843, 'epoch': 2} {'type': 'loss', 'content': 0.054717447608709335, 'timestamp': '2025-09-10 02:38:43.739360', 'step': 10844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.771491', 'step': 10844, 'epoch': 2} {'type': 'loss', 'content': 0.11863963305950165, 'timestamp': '2025-09-10 02:38:43.773821', 'step': 10845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:43.804428', 'step': 10845, 'epoch': 2} {'type': 'loss', 'content': 0.12092378735542297, 'timestamp': '2025-09-10 02:38:43.806518', 'step': 10846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:43.836364', 'step': 10846, 'epoch': 2} {'type': 'loss', 'content': 0.08229552954435349, 'timestamp': '2025-09-10 02:38:43.838443', 'step': 10847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.868801', 'step': 10847, 'epoch': 2} {'type': 'loss', 'content': 0.0696839764714241, 'timestamp': '2025-09-10 02:38:43.892418', 'step': 10848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.923785', 'step': 10848, 'epoch': 2} {'type': 'loss', 'content': 0.1145535558462143, 'timestamp': '2025-09-10 02:38:43.925912', 'step': 10849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.955892', 'step': 10849, 'epoch': 2} {'type': 'loss', 'content': 0.1477881520986557, 'timestamp': '2025-09-10 02:38:43.958536', 'step': 10850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:43.989276', 'step': 10850, 'epoch': 2} {'type': 'loss', 'content': 0.14237798750400543, 'timestamp': '2025-09-10 02:38:43.992079', 'step': 10851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:44.022927', 'step': 10851, 'epoch': 2} {'type': 'loss', 'content': 0.13602110743522644, 'timestamp': '2025-09-10 02:38:44.046308', 'step': 10852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.076903', 'step': 10852, 'epoch': 2} {'type': 'loss', 'content': 0.09550724923610687, 'timestamp': '2025-09-10 02:38:44.079185', 'step': 10853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:44.111019', 'step': 10853, 'epoch': 2} {'type': 'loss', 'content': 0.07347456365823746, 'timestamp': '2025-09-10 02:38:44.113898', 'step': 10854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.144735', 'step': 10854, 'epoch': 2} {'type': 'loss', 'content': 0.053467269986867905, 'timestamp': '2025-09-10 02:38:44.146966', 'step': 10855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.177063', 'step': 10855, 'epoch': 2} {'type': 'loss', 'content': 0.11506886035203934, 'timestamp': '2025-09-10 02:38:44.200688', 'step': 10856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:44.233697', 'step': 10856, 'epoch': 2} {'type': 'loss', 'content': 0.13955359160900116, 'timestamp': '2025-09-10 02:38:44.235925', 'step': 10857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.265875', 'step': 10857, 'epoch': 2} {'type': 'loss', 'content': 0.13178801536560059, 'timestamp': '2025-09-10 02:38:44.268536', 'step': 10858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.298977', 'step': 10858, 'epoch': 2} {'type': 'loss', 'content': 0.1534809172153473, 'timestamp': '2025-09-10 02:38:44.301230', 'step': 10859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.335083', 'step': 10859, 'epoch': 2} {'type': 'loss', 'content': 0.03623618930578232, 'timestamp': '2025-09-10 02:38:44.358839', 'step': 10860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.389994', 'step': 10860, 'epoch': 2} {'type': 'loss', 'content': 0.11741257458925247, 'timestamp': '2025-09-10 02:38:44.392538', 'step': 10861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:44.425189', 'step': 10861, 'epoch': 2} {'type': 'loss', 'content': 0.1255916804075241, 'timestamp': '2025-09-10 02:38:44.427435', 'step': 10862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:44.458217', 'step': 10862, 'epoch': 2} {'type': 'loss', 'content': 0.15134602785110474, 'timestamp': '2025-09-10 02:38:44.460653', 'step': 10863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:44.490573', 'step': 10863, 'epoch': 2} {'type': 'loss', 'content': 0.12407510727643967, 'timestamp': '2025-09-10 02:38:44.514109', 'step': 10864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.544463', 'step': 10864, 'epoch': 2} {'type': 'loss', 'content': 0.12306597828865051, 'timestamp': '2025-09-10 02:38:44.548125', 'step': 10865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:44.579582', 'step': 10865, 'epoch': 2} {'type': 'loss', 'content': 0.08803717792034149, 'timestamp': '2025-09-10 02:38:44.581860', 'step': 10866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:44.612553', 'step': 10866, 'epoch': 2} {'type': 'loss', 'content': 0.1212489902973175, 'timestamp': '2025-09-10 02:38:44.616226', 'step': 10867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:44.646422', 'step': 10867, 'epoch': 2} {'type': 'loss', 'content': 0.13114000856876373, 'timestamp': '2025-09-10 02:38:44.670001', 'step': 10868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:44.700527', 'step': 10868, 'epoch': 2} {'type': 'loss', 'content': 0.07715681195259094, 'timestamp': '2025-09-10 02:38:44.702714', 'step': 10869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:44.733221', 'step': 10869, 'epoch': 2} {'type': 'loss', 'content': 0.03950931131839752, 'timestamp': '2025-09-10 02:38:44.737256', 'step': 10870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:44.768670', 'step': 10870, 'epoch': 2} {'type': 'loss', 'content': 0.036298565566539764, 'timestamp': '2025-09-10 02:38:44.771073', 'step': 10871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:44.800926', 'step': 10871, 'epoch': 2} {'type': 'loss', 'content': 0.15143074095249176, 'timestamp': '2025-09-10 02:38:44.824356', 'step': 10872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.854688', 'step': 10872, 'epoch': 2} {'type': 'loss', 'content': 0.13661056756973267, 'timestamp': '2025-09-10 02:38:44.856901', 'step': 10873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:44.887740', 'step': 10873, 'epoch': 2} {'type': 'loss', 'content': 0.06693726032972336, 'timestamp': '2025-09-10 02:38:44.890111', 'step': 10874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:44.920607', 'step': 10874, 'epoch': 2} {'type': 'loss', 'content': 0.11933031678199768, 'timestamp': '2025-09-10 02:38:44.924411', 'step': 10875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:44.954181', 'step': 10875, 'epoch': 2} {'type': 'loss', 'content': 0.11888214200735092, 'timestamp': '2025-09-10 02:38:44.977875', 'step': 10876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.008676', 'step': 10876, 'epoch': 2} {'type': 'loss', 'content': 0.04251841828227043, 'timestamp': '2025-09-10 02:38:45.011538', 'step': 10877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:45.044686', 'step': 10877, 'epoch': 2} {'type': 'loss', 'content': 0.07721420377492905, 'timestamp': '2025-09-10 02:38:45.046970', 'step': 10878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:45.076764', 'step': 10878, 'epoch': 2} {'type': 'loss', 'content': 0.10965198278427124, 'timestamp': '2025-09-10 02:38:45.079126', 'step': 10879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:45.109489', 'step': 10879, 'epoch': 2} {'type': 'loss', 'content': 0.05464419722557068, 'timestamp': '2025-09-10 02:38:45.133083', 'step': 10880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.163726', 'step': 10880, 'epoch': 2} {'type': 'loss', 'content': 0.139204740524292, 'timestamp': '2025-09-10 02:38:45.166034', 'step': 10881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.195964', 'step': 10881, 'epoch': 2} {'type': 'loss', 'content': 0.14104169607162476, 'timestamp': '2025-09-10 02:38:45.198974', 'step': 10882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:45.229471', 'step': 10882, 'epoch': 2} {'type': 'loss', 'content': 0.13899250328540802, 'timestamp': '2025-09-10 02:38:45.232045', 'step': 10883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:45.261985', 'step': 10883, 'epoch': 2} {'type': 'loss', 'content': 0.09417587518692017, 'timestamp': '2025-09-10 02:38:45.285480', 'step': 10884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.316950', 'step': 10884, 'epoch': 2} {'type': 'loss', 'content': 0.039110444486141205, 'timestamp': '2025-09-10 02:38:45.319285', 'step': 10885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.351156', 'step': 10885, 'epoch': 2} {'type': 'loss', 'content': 0.05737626180052757, 'timestamp': '2025-09-10 02:38:45.353183', 'step': 10886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:45.383129', 'step': 10886, 'epoch': 2} {'type': 'loss', 'content': 0.21455317735671997, 'timestamp': '2025-09-10 02:38:45.385440', 'step': 10887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:45.415576', 'step': 10887, 'epoch': 2} {'type': 'loss', 'content': 0.2032230645418167, 'timestamp': '2025-09-10 02:38:45.439688', 'step': 10888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:45.470458', 'step': 10888, 'epoch': 2} {'type': 'loss', 'content': 0.08065373450517654, 'timestamp': '2025-09-10 02:38:45.472709', 'step': 10889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:45.503195', 'step': 10889, 'epoch': 2} {'type': 'loss', 'content': 0.15775613486766815, 'timestamp': '2025-09-10 02:38:45.505771', 'step': 10890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.535894', 'step': 10890, 'epoch': 2} {'type': 'loss', 'content': 0.12859992682933807, 'timestamp': '2025-09-10 02:38:45.538124', 'step': 10891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.568142', 'step': 10891, 'epoch': 2} {'type': 'loss', 'content': 0.09124790132045746, 'timestamp': '2025-09-10 02:38:45.591608', 'step': 10892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.621484', 'step': 10892, 'epoch': 2} {'type': 'loss', 'content': 0.042244184762239456, 'timestamp': '2025-09-10 02:38:45.623819', 'step': 10893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.656321', 'step': 10893, 'epoch': 2} {'type': 'loss', 'content': 0.15253135561943054, 'timestamp': '2025-09-10 02:38:45.658803', 'step': 10894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.689246', 'step': 10894, 'epoch': 2} {'type': 'loss', 'content': 0.07802658528089523, 'timestamp': '2025-09-10 02:38:45.691458', 'step': 10895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.722557', 'step': 10895, 'epoch': 2} {'type': 'loss', 'content': 0.20796628296375275, 'timestamp': '2025-09-10 02:38:45.746377', 'step': 10896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:45.777286', 'step': 10896, 'epoch': 2} {'type': 'loss', 'content': 0.08711329102516174, 'timestamp': '2025-09-10 02:38:45.779627', 'step': 10897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:45.812898', 'step': 10897, 'epoch': 2} {'type': 'loss', 'content': 0.20107249915599823, 'timestamp': '2025-09-10 02:38:45.815720', 'step': 10898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:45.849172', 'step': 10898, 'epoch': 2} {'type': 'loss', 'content': 0.09538615494966507, 'timestamp': '2025-09-10 02:38:45.852456', 'step': 10899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:45.885402', 'step': 10899, 'epoch': 2} {'type': 'loss', 'content': 0.1054152175784111, 'timestamp': '2025-09-10 02:38:45.909400', 'step': 10900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:45.941438', 'step': 10900, 'epoch': 2} {'type': 'loss', 'content': 0.09279603511095047, 'timestamp': '2025-09-10 02:38:45.944089', 'step': 10901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:45.976398', 'step': 10901, 'epoch': 2} {'type': 'loss', 'content': 0.15927496552467346, 'timestamp': '2025-09-10 02:38:45.979486', 'step': 10902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.011495', 'step': 10902, 'epoch': 2} {'type': 'loss', 'content': 0.13621461391448975, 'timestamp': '2025-09-10 02:38:46.013832', 'step': 10903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:46.043658', 'step': 10903, 'epoch': 2} {'type': 'loss', 'content': 0.07739456743001938, 'timestamp': '2025-09-10 02:38:46.067404', 'step': 10904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:46.098825', 'step': 10904, 'epoch': 2} {'type': 'loss', 'content': 0.129852756857872, 'timestamp': '2025-09-10 02:38:46.101465', 'step': 10905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:46.131156', 'step': 10905, 'epoch': 2} {'type': 'loss', 'content': 0.10965859889984131, 'timestamp': '2025-09-10 02:38:46.134776', 'step': 10906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.165339', 'step': 10906, 'epoch': 2} {'type': 'loss', 'content': 0.07973834127187729, 'timestamp': '2025-09-10 02:38:46.167691', 'step': 10907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.198065', 'step': 10907, 'epoch': 2} {'type': 'loss', 'content': 0.15605591237545013, 'timestamp': '2025-09-10 02:38:46.222238', 'step': 10908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:46.255043', 'step': 10908, 'epoch': 2} {'type': 'loss', 'content': 0.1238727867603302, 'timestamp': '2025-09-10 02:38:46.257274', 'step': 10909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:46.287809', 'step': 10909, 'epoch': 2} {'type': 'loss', 'content': 0.14656145870685577, 'timestamp': '2025-09-10 02:38:46.290335', 'step': 10910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:46.320193', 'step': 10910, 'epoch': 2} {'type': 'loss', 'content': 0.1755509376525879, 'timestamp': '2025-09-10 02:38:46.322680', 'step': 10911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:46.353405', 'step': 10911, 'epoch': 2} {'type': 'loss', 'content': 0.14669260382652283, 'timestamp': '2025-09-10 02:38:46.376828', 'step': 10912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:46.407974', 'step': 10912, 'epoch': 2} {'type': 'loss', 'content': 0.10455676913261414, 'timestamp': '2025-09-10 02:38:46.410097', 'step': 10913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:46.445918', 'step': 10913, 'epoch': 2} {'type': 'loss', 'content': 0.11293576657772064, 'timestamp': '2025-09-10 02:38:46.448394', 'step': 10914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.477905', 'step': 10914, 'epoch': 2} {'type': 'loss', 'content': 0.07986656576395035, 'timestamp': '2025-09-10 02:38:46.480351', 'step': 10915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:46.510439', 'step': 10915, 'epoch': 2} {'type': 'loss', 'content': 0.15825344622135162, 'timestamp': '2025-09-10 02:38:46.533981', 'step': 10916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.566331', 'step': 10916, 'epoch': 2} {'type': 'loss', 'content': 0.032662030309438705, 'timestamp': '2025-09-10 02:38:46.568675', 'step': 10917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.599332', 'step': 10917, 'epoch': 2} {'type': 'loss', 'content': 0.06624340265989304, 'timestamp': '2025-09-10 02:38:46.601982', 'step': 10918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.632863', 'step': 10918, 'epoch': 2} {'type': 'loss', 'content': 0.14656415581703186, 'timestamp': '2025-09-10 02:38:46.639652', 'step': 10919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:46.674264', 'step': 10919, 'epoch': 2} {'type': 'loss', 'content': 0.13719549775123596, 'timestamp': '2025-09-10 02:38:46.697623', 'step': 10920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:46.728048', 'step': 10920, 'epoch': 2} {'type': 'loss', 'content': 0.08968491107225418, 'timestamp': '2025-09-10 02:38:46.730249', 'step': 10921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:46.760623', 'step': 10921, 'epoch': 2} {'type': 'loss', 'content': 0.03783132880926132, 'timestamp': '2025-09-10 02:38:46.763137', 'step': 10922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.793036', 'step': 10922, 'epoch': 2} {'type': 'loss', 'content': 0.14513543248176575, 'timestamp': '2025-09-10 02:38:46.795522', 'step': 10923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.825930', 'step': 10923, 'epoch': 2} {'type': 'loss', 'content': 0.09546460211277008, 'timestamp': '2025-09-10 02:38:46.849572', 'step': 10924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.880647', 'step': 10924, 'epoch': 2} {'type': 'loss', 'content': 0.14539861679077148, 'timestamp': '2025-09-10 02:38:46.883128', 'step': 10925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.913389', 'step': 10925, 'epoch': 2} {'type': 'loss', 'content': 0.09313250333070755, 'timestamp': '2025-09-10 02:38:46.915344', 'step': 10926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:46.945507', 'step': 10926, 'epoch': 2} {'type': 'loss', 'content': 0.15044763684272766, 'timestamp': '2025-09-10 02:38:46.947819', 'step': 10927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:46.978140', 'step': 10927, 'epoch': 2} {'type': 'loss', 'content': 0.06265109032392502, 'timestamp': '2025-09-10 02:38:47.002450', 'step': 10928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:47.033503', 'step': 10928, 'epoch': 2} {'type': 'loss', 'content': 0.10230539739131927, 'timestamp': '2025-09-10 02:38:47.035884', 'step': 10929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:47.067393', 'step': 10929, 'epoch': 2} {'type': 'loss', 'content': 0.150699645280838, 'timestamp': '2025-09-10 02:38:47.069756', 'step': 10930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:47.099923', 'step': 10930, 'epoch': 2} {'type': 'loss', 'content': 0.17215177416801453, 'timestamp': '2025-09-10 02:38:47.103260', 'step': 10931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:47.133386', 'step': 10931, 'epoch': 2} {'type': 'loss', 'content': 0.08411935716867447, 'timestamp': '2025-09-10 02:38:47.157370', 'step': 10932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:47.188695', 'step': 10932, 'epoch': 2} {'type': 'loss', 'content': 0.07112591713666916, 'timestamp': '2025-09-10 02:38:47.191068', 'step': 10933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:47.221544', 'step': 10933, 'epoch': 2} {'type': 'loss', 'content': 0.12088267505168915, 'timestamp': '2025-09-10 02:38:47.223891', 'step': 10934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:47.255568', 'step': 10934, 'epoch': 2} {'type': 'loss', 'content': 0.12929770350456238, 'timestamp': '2025-09-10 02:38:47.257986', 'step': 10935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:47.289729', 'step': 10935, 'epoch': 2} {'type': 'loss', 'content': 0.20349904894828796, 'timestamp': '2025-09-10 02:38:47.313458', 'step': 10936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:47.344138', 'step': 10936, 'epoch': 2} {'type': 'loss', 'content': 0.09625498205423355, 'timestamp': '2025-09-10 02:38:47.346351', 'step': 10937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:47.377199', 'step': 10937, 'epoch': 2} {'type': 'loss', 'content': 0.10899247229099274, 'timestamp': '2025-09-10 02:38:47.379497', 'step': 10938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:47.410333', 'step': 10938, 'epoch': 2} {'type': 'loss', 'content': 0.10330784320831299, 'timestamp': '2025-09-10 02:38:47.412726', 'step': 10939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:47.442742', 'step': 10939, 'epoch': 2} {'type': 'loss', 'content': 0.1576939970254898, 'timestamp': '2025-09-10 02:38:47.466301', 'step': 10940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:47.498185', 'step': 10940, 'epoch': 2} {'type': 'loss', 'content': 0.12707281112670898, 'timestamp': '2025-09-10 02:38:47.500517', 'step': 10941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:47.530575', 'step': 10941, 'epoch': 2} {'type': 'loss', 'content': 0.18336358666419983, 'timestamp': '2025-09-10 02:38:47.532699', 'step': 10942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:47.562954', 'step': 10942, 'epoch': 2} {'type': 'loss', 'content': 0.1679593324661255, 'timestamp': '2025-09-10 02:38:47.565224', 'step': 10943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:47.595129', 'step': 10943, 'epoch': 2} {'type': 'loss', 'content': 0.1267540603876114, 'timestamp': '2025-09-10 02:38:47.618606', 'step': 10944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:47.649609', 'step': 10944, 'epoch': 2} {'type': 'loss', 'content': 0.09957575798034668, 'timestamp': '2025-09-10 02:38:47.653243', 'step': 10945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:47.683430', 'step': 10945, 'epoch': 2} {'type': 'loss', 'content': 0.08551806211471558, 'timestamp': '2025-09-10 02:38:47.686019', 'step': 10946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:47.716044', 'step': 10946, 'epoch': 2} {'type': 'loss', 'content': 0.10173238068819046, 'timestamp': '2025-09-10 02:38:47.718671', 'step': 10947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:47.749960', 'step': 10947, 'epoch': 2} {'type': 'loss', 'content': 0.09825605899095535, 'timestamp': '2025-09-10 02:38:47.773663', 'step': 10948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:47.803516', 'step': 10948, 'epoch': 2} {'type': 'loss', 'content': 0.1179594025015831, 'timestamp': '2025-09-10 02:38:47.806001', 'step': 10949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:47.835953', 'step': 10949, 'epoch': 2} {'type': 'loss', 'content': 0.06295228749513626, 'timestamp': '2025-09-10 02:38:47.838424', 'step': 10950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:47.869139', 'step': 10950, 'epoch': 2} {'type': 'loss', 'content': 0.09335172921419144, 'timestamp': '2025-09-10 02:38:47.871387', 'step': 10951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:47.901437', 'step': 10951, 'epoch': 2} {'type': 'loss', 'content': 0.1087387204170227, 'timestamp': '2025-09-10 02:38:47.924789', 'step': 10952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:47.955294', 'step': 10952, 'epoch': 2} {'type': 'loss', 'content': 0.04850362613797188, 'timestamp': '2025-09-10 02:38:47.958856', 'step': 10953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:47.989017', 'step': 10953, 'epoch': 2} {'type': 'loss', 'content': 0.1297273486852646, 'timestamp': '2025-09-10 02:38:47.991178', 'step': 10954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:48.021179', 'step': 10954, 'epoch': 2} {'type': 'loss', 'content': 0.1122966781258583, 'timestamp': '2025-09-10 02:38:48.023915', 'step': 10955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:48.053949', 'step': 10955, 'epoch': 2} {'type': 'loss', 'content': 0.26451653242111206, 'timestamp': '2025-09-10 02:38:48.077406', 'step': 10956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:38:48.107878', 'step': 10956, 'epoch': 2} {'type': 'loss', 'content': 0.06363875418901443, 'timestamp': '2025-09-10 02:38:48.110522', 'step': 10957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:48.140416', 'step': 10957, 'epoch': 2} {'type': 'loss', 'content': 0.13930797576904297, 'timestamp': '2025-09-10 02:38:48.143073', 'step': 10958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:48.173690', 'step': 10958, 'epoch': 2} {'type': 'loss', 'content': 0.11164083331823349, 'timestamp': '2025-09-10 02:38:48.176333', 'step': 10959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:48.207287', 'step': 10959, 'epoch': 2} {'type': 'loss', 'content': 0.09437157958745956, 'timestamp': '2025-09-10 02:38:48.230984', 'step': 10960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:48.263872', 'step': 10960, 'epoch': 2} {'type': 'loss', 'content': 0.16623654961585999, 'timestamp': '2025-09-10 02:38:48.266970', 'step': 10961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:48.298036', 'step': 10961, 'epoch': 2} {'type': 'loss', 'content': 0.10317377746105194, 'timestamp': '2025-09-10 02:38:48.300469', 'step': 10962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:48.332519', 'step': 10962, 'epoch': 2} {'type': 'loss', 'content': 0.07901224493980408, 'timestamp': '2025-09-10 02:38:48.334931', 'step': 10963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:48.364895', 'step': 10963, 'epoch': 2} {'type': 'loss', 'content': 0.13386307656764984, 'timestamp': '2025-09-10 02:38:48.388546', 'step': 10964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:48.419605', 'step': 10964, 'epoch': 2} {'type': 'loss', 'content': 0.0877443253993988, 'timestamp': '2025-09-10 02:38:48.422152', 'step': 10965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:48.453899', 'step': 10965, 'epoch': 2} {'type': 'loss', 'content': 0.09113091230392456, 'timestamp': '2025-09-10 02:38:48.456679', 'step': 10966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:48.489321', 'step': 10966, 'epoch': 2} {'type': 'loss', 'content': 0.11785345524549484, 'timestamp': '2025-09-10 02:38:48.491778', 'step': 10967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:48.521597', 'step': 10967, 'epoch': 2} {'type': 'loss', 'content': 0.09877314418554306, 'timestamp': '2025-09-10 02:38:48.545334', 'step': 10968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:48.576272', 'step': 10968, 'epoch': 2} {'type': 'loss', 'content': 0.09630690515041351, 'timestamp': '2025-09-10 02:38:48.578820', 'step': 10969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:48.609306', 'step': 10969, 'epoch': 2} {'type': 'loss', 'content': 0.16859911382198334, 'timestamp': '2025-09-10 02:38:48.611565', 'step': 10970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:48.642853', 'step': 10970, 'epoch': 2} {'type': 'loss', 'content': 0.1445566713809967, 'timestamp': '2025-09-10 02:38:48.645315', 'step': 10971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:48.676858', 'step': 10971, 'epoch': 2} {'type': 'loss', 'content': 0.13012494146823883, 'timestamp': '2025-09-10 02:38:48.700848', 'step': 10972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:48.734086', 'step': 10972, 'epoch': 2} {'type': 'loss', 'content': 0.04502089321613312, 'timestamp': '2025-09-10 02:38:48.736554', 'step': 10973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:48.766864', 'step': 10973, 'epoch': 2} {'type': 'loss', 'content': 0.1329335868358612, 'timestamp': '2025-09-10 02:38:48.771582', 'step': 10974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:48.802415', 'step': 10974, 'epoch': 2} {'type': 'loss', 'content': 0.16643847525119781, 'timestamp': '2025-09-10 02:38:48.804919', 'step': 10975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:48.836845', 'step': 10975, 'epoch': 2} {'type': 'loss', 'content': 0.10852477699518204, 'timestamp': '2025-09-10 02:38:48.860657', 'step': 10976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:48.892963', 'step': 10976, 'epoch': 2} {'type': 'loss', 'content': 0.13996003568172455, 'timestamp': '2025-09-10 02:38:48.895946', 'step': 10977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:48.926909', 'step': 10977, 'epoch': 2} {'type': 'loss', 'content': 0.12103419005870819, 'timestamp': '2025-09-10 02:38:48.929745', 'step': 10978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:48.961285', 'step': 10978, 'epoch': 2} {'type': 'loss', 'content': 0.11087903380393982, 'timestamp': '2025-09-10 02:38:48.963791', 'step': 10979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:48.995968', 'step': 10979, 'epoch': 2} {'type': 'loss', 'content': 0.16999328136444092, 'timestamp': '2025-09-10 02:38:49.019698', 'step': 10980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:49.050682', 'step': 10980, 'epoch': 2} {'type': 'loss', 'content': 0.08561448752880096, 'timestamp': '2025-09-10 02:38:49.052810', 'step': 10981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:49.083563', 'step': 10981, 'epoch': 2} {'type': 'loss', 'content': 0.10414523631334305, 'timestamp': '2025-09-10 02:38:49.086518', 'step': 10982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:49.117383', 'step': 10982, 'epoch': 2} {'type': 'loss', 'content': 0.14785578846931458, 'timestamp': '2025-09-10 02:38:49.119741', 'step': 10983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:49.150227', 'step': 10983, 'epoch': 2} {'type': 'loss', 'content': 0.07495508342981339, 'timestamp': '2025-09-10 02:38:49.173700', 'step': 10984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:38:49.204567', 'step': 10984, 'epoch': 2} {'type': 'loss', 'content': 0.08766796439886093, 'timestamp': '2025-09-10 02:38:49.207686', 'step': 10985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:49.238904', 'step': 10985, 'epoch': 2} {'type': 'loss', 'content': 0.09185656160116196, 'timestamp': '2025-09-10 02:38:49.241222', 'step': 10986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:49.271534', 'step': 10986, 'epoch': 2} {'type': 'loss', 'content': 0.18050645291805267, 'timestamp': '2025-09-10 02:38:49.274579', 'step': 10987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:49.306568', 'step': 10987, 'epoch': 2} {'type': 'loss', 'content': 0.07702349126338959, 'timestamp': '2025-09-10 02:38:49.330456', 'step': 10988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:49.361737', 'step': 10988, 'epoch': 2} {'type': 'loss', 'content': 0.1266365647315979, 'timestamp': '2025-09-10 02:38:49.363977', 'step': 10989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:49.394124', 'step': 10989, 'epoch': 2} {'type': 'loss', 'content': 0.1066632941365242, 'timestamp': '2025-09-10 02:38:49.396956', 'step': 10990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:49.427010', 'step': 10990, 'epoch': 2} {'type': 'loss', 'content': 0.09655788540840149, 'timestamp': '2025-09-10 02:38:49.429753', 'step': 10991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:49.464588', 'step': 10991, 'epoch': 2} {'type': 'loss', 'content': 0.06630007177591324, 'timestamp': '2025-09-10 02:38:49.488070', 'step': 10992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:49.519648', 'step': 10992, 'epoch': 2} {'type': 'loss', 'content': 0.14636638760566711, 'timestamp': '2025-09-10 02:38:49.521869', 'step': 10993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:49.552451', 'step': 10993, 'epoch': 2} {'type': 'loss', 'content': 0.12861518561840057, 'timestamp': '2025-09-10 02:38:49.554900', 'step': 10994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:49.585699', 'step': 10994, 'epoch': 2} {'type': 'loss', 'content': 0.0705333948135376, 'timestamp': '2025-09-10 02:38:49.588329', 'step': 10995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:49.618136', 'step': 10995, 'epoch': 2} {'type': 'loss', 'content': 0.12478118389844894, 'timestamp': '2025-09-10 02:38:49.642246', 'step': 10996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:49.676467', 'step': 10996, 'epoch': 2} {'type': 'loss', 'content': 0.12807968258857727, 'timestamp': '2025-09-10 02:38:49.680704', 'step': 10997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:49.711302', 'step': 10997, 'epoch': 2} {'type': 'loss', 'content': 0.048607755452394485, 'timestamp': '2025-09-10 02:38:49.713490', 'step': 10998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:49.744044', 'step': 10998, 'epoch': 2} {'type': 'loss', 'content': 0.18874113261699677, 'timestamp': '2025-09-10 02:38:49.746808', 'step': 10999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:49.784678', 'step': 10999, 'epoch': 2} {'type': 'loss', 'content': 0.15461809933185577, 'timestamp': '2025-09-10 02:38:49.808795', 'step': 11000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11000', 'timestamp': '2025-09-10 02:38:56.167594', 'step': 11000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:56.200623', 'step': 11000, 'epoch': 2} {'type': 'loss', 'content': 0.13512536883354187, 'timestamp': '2025-09-10 02:38:56.207100', 'step': 11001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:56.237954', 'step': 11001, 'epoch': 2} {'type': 'loss', 'content': 0.13153892755508423, 'timestamp': '2025-09-10 02:38:56.240201', 'step': 11002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:56.270375', 'step': 11002, 'epoch': 2} {'type': 'loss', 'content': 0.1215469166636467, 'timestamp': '2025-09-10 02:38:56.272504', 'step': 11003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:56.302486', 'step': 11003, 'epoch': 2} {'type': 'loss', 'content': 0.07967071235179901, 'timestamp': '2025-09-10 02:38:56.326499', 'step': 11004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:56.362891', 'step': 11004, 'epoch': 2} {'type': 'loss', 'content': 0.12484076619148254, 'timestamp': '2025-09-10 02:38:56.366686', 'step': 11005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:56.399435', 'step': 11005, 'epoch': 2} {'type': 'loss', 'content': 0.21100996434688568, 'timestamp': '2025-09-10 02:38:56.405983', 'step': 11006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:38:56.440131', 'step': 11006, 'epoch': 2} {'type': 'loss', 'content': 0.14711959660053253, 'timestamp': '2025-09-10 02:38:56.444377', 'step': 11007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:56.480390', 'step': 11007, 'epoch': 2} {'type': 'loss', 'content': 0.07918740808963776, 'timestamp': '2025-09-10 02:38:56.504139', 'step': 11008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:56.541549', 'step': 11008, 'epoch': 2} {'type': 'loss', 'content': 0.09936004132032394, 'timestamp': '2025-09-10 02:38:56.544459', 'step': 11009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:56.580873', 'step': 11009, 'epoch': 2} {'type': 'loss', 'content': 0.11878742277622223, 'timestamp': '2025-09-10 02:38:56.583080', 'step': 11010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:56.616826', 'step': 11010, 'epoch': 2} {'type': 'loss', 'content': 0.09227480739355087, 'timestamp': '2025-09-10 02:38:56.620589', 'step': 11011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:56.655399', 'step': 11011, 'epoch': 2} {'type': 'loss', 'content': 0.06673281639814377, 'timestamp': '2025-09-10 02:38:56.679099', 'step': 11012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:56.714341', 'step': 11012, 'epoch': 2} {'type': 'loss', 'content': 0.06799344718456268, 'timestamp': '2025-09-10 02:38:56.716848', 'step': 11013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:56.747119', 'step': 11013, 'epoch': 2} {'type': 'loss', 'content': 0.06767483055591583, 'timestamp': '2025-09-10 02:38:56.749591', 'step': 11014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:56.779311', 'step': 11014, 'epoch': 2} {'type': 'loss', 'content': 0.1004638671875, 'timestamp': '2025-09-10 02:38:56.781598', 'step': 11015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:56.814781', 'step': 11015, 'epoch': 2} {'type': 'loss', 'content': 0.08199254423379898, 'timestamp': '2025-09-10 02:38:56.838395', 'step': 11016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:56.875048', 'step': 11016, 'epoch': 2} {'type': 'loss', 'content': 0.10464086383581161, 'timestamp': '2025-09-10 02:38:56.880535', 'step': 11017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:56.911223', 'step': 11017, 'epoch': 2} {'type': 'loss', 'content': 0.1230764240026474, 'timestamp': '2025-09-10 02:38:56.913813', 'step': 11018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:56.946587', 'step': 11018, 'epoch': 2} {'type': 'loss', 'content': 0.16470769047737122, 'timestamp': '2025-09-10 02:38:56.953042', 'step': 11019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:56.985689', 'step': 11019, 'epoch': 2} {'type': 'loss', 'content': 0.038507115095853806, 'timestamp': '2025-09-10 02:38:57.010651', 'step': 11020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:57.048069', 'step': 11020, 'epoch': 2} {'type': 'loss', 'content': 0.12316323816776276, 'timestamp': '2025-09-10 02:38:57.050635', 'step': 11021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:57.081203', 'step': 11021, 'epoch': 2} {'type': 'loss', 'content': 0.1712682843208313, 'timestamp': '2025-09-10 02:38:57.083935', 'step': 11022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:57.115082', 'step': 11022, 'epoch': 2} {'type': 'loss', 'content': 0.10133140534162521, 'timestamp': '2025-09-10 02:38:57.117437', 'step': 11023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:57.148124', 'step': 11023, 'epoch': 2} {'type': 'loss', 'content': 0.08310027420520782, 'timestamp': '2025-09-10 02:38:57.172106', 'step': 11024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:57.205372', 'step': 11024, 'epoch': 2} {'type': 'loss', 'content': 0.07554244250059128, 'timestamp': '2025-09-10 02:38:57.208095', 'step': 11025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:57.238342', 'step': 11025, 'epoch': 2} {'type': 'loss', 'content': 0.1630660593509674, 'timestamp': '2025-09-10 02:38:57.240818', 'step': 11026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:57.271321', 'step': 11026, 'epoch': 2} {'type': 'loss', 'content': 0.03541501238942146, 'timestamp': '2025-09-10 02:38:57.273512', 'step': 11027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:57.304395', 'step': 11027, 'epoch': 2} {'type': 'loss', 'content': 0.06380791962146759, 'timestamp': '2025-09-10 02:38:57.328098', 'step': 11028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:57.358138', 'step': 11028, 'epoch': 2} {'type': 'loss', 'content': 0.1212359219789505, 'timestamp': '2025-09-10 02:38:57.360473', 'step': 11029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:57.390926', 'step': 11029, 'epoch': 2} {'type': 'loss', 'content': 0.16872742772102356, 'timestamp': '2025-09-10 02:38:57.393426', 'step': 11030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:57.423023', 'step': 11030, 'epoch': 2} {'type': 'loss', 'content': 0.10171020030975342, 'timestamp': '2025-09-10 02:38:57.425756', 'step': 11031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:57.456419', 'step': 11031, 'epoch': 2} {'type': 'loss', 'content': 0.12960854172706604, 'timestamp': '2025-09-10 02:38:57.480053', 'step': 11032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:57.510518', 'step': 11032, 'epoch': 2} {'type': 'loss', 'content': 0.12989045679569244, 'timestamp': '2025-09-10 02:38:57.512779', 'step': 11033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:57.543162', 'step': 11033, 'epoch': 2} {'type': 'loss', 'content': 0.19032299518585205, 'timestamp': '2025-09-10 02:38:57.545479', 'step': 11034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:57.574949', 'step': 11034, 'epoch': 2} {'type': 'loss', 'content': 0.0964583307504654, 'timestamp': '2025-09-10 02:38:57.577431', 'step': 11035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:57.608652', 'step': 11035, 'epoch': 2} {'type': 'loss', 'content': 0.13763222098350525, 'timestamp': '2025-09-10 02:38:57.631999', 'step': 11036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:57.662190', 'step': 11036, 'epoch': 2} {'type': 'loss', 'content': 0.1613718867301941, 'timestamp': '2025-09-10 02:38:57.664317', 'step': 11037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:57.693849', 'step': 11037, 'epoch': 2} {'type': 'loss', 'content': 0.16407442092895508, 'timestamp': '2025-09-10 02:38:57.696122', 'step': 11038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:57.725743', 'step': 11038, 'epoch': 2} {'type': 'loss', 'content': 0.06512638926506042, 'timestamp': '2025-09-10 02:38:57.729828', 'step': 11039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:57.761438', 'step': 11039, 'epoch': 2} {'type': 'loss', 'content': 0.06501699984073639, 'timestamp': '2025-09-10 02:38:57.785485', 'step': 11040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:57.820941', 'step': 11040, 'epoch': 2} {'type': 'loss', 'content': 0.11129689961671829, 'timestamp': '2025-09-10 02:38:57.823352', 'step': 11041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:57.853490', 'step': 11041, 'epoch': 2} {'type': 'loss', 'content': 0.08402359485626221, 'timestamp': '2025-09-10 02:38:57.856282', 'step': 11042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:57.886691', 'step': 11042, 'epoch': 2} {'type': 'loss', 'content': 0.05910315737128258, 'timestamp': '2025-09-10 02:38:57.889109', 'step': 11043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:57.919480', 'step': 11043, 'epoch': 2} {'type': 'loss', 'content': 0.1166464239358902, 'timestamp': '2025-09-10 02:38:57.943449', 'step': 11044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:57.973506', 'step': 11044, 'epoch': 2} {'type': 'loss', 'content': 0.1658928096294403, 'timestamp': '2025-09-10 02:38:57.975924', 'step': 11045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.006937', 'step': 11045, 'epoch': 2} {'type': 'loss', 'content': 0.20329751074314117, 'timestamp': '2025-09-10 02:38:58.010886', 'step': 11046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:58.041932', 'step': 11046, 'epoch': 2} {'type': 'loss', 'content': 0.04837295413017273, 'timestamp': '2025-09-10 02:38:58.044373', 'step': 11047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:58.074162', 'step': 11047, 'epoch': 2} {'type': 'loss', 'content': 0.07367464154958725, 'timestamp': '2025-09-10 02:38:58.098198', 'step': 11048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.128877', 'step': 11048, 'epoch': 2} {'type': 'loss', 'content': 0.14319942891597748, 'timestamp': '2025-09-10 02:38:58.130895', 'step': 11049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.160651', 'step': 11049, 'epoch': 2} {'type': 'loss', 'content': 0.1719738095998764, 'timestamp': '2025-09-10 02:38:58.163028', 'step': 11050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.192821', 'step': 11050, 'epoch': 2} {'type': 'loss', 'content': 0.057670753449201584, 'timestamp': '2025-09-10 02:38:58.194953', 'step': 11051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:58.224349', 'step': 11051, 'epoch': 2} {'type': 'loss', 'content': 0.0744752585887909, 'timestamp': '2025-09-10 02:38:58.248120', 'step': 11052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:58.278749', 'step': 11052, 'epoch': 2} {'type': 'loss', 'content': 0.06920667737722397, 'timestamp': '2025-09-10 02:38:58.281730', 'step': 11053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.314814', 'step': 11053, 'epoch': 2} {'type': 'loss', 'content': 0.13998599350452423, 'timestamp': '2025-09-10 02:38:58.317793', 'step': 11054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:58.349212', 'step': 11054, 'epoch': 2} {'type': 'loss', 'content': 0.07150663435459137, 'timestamp': '2025-09-10 02:38:58.351754', 'step': 11055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:58.381959', 'step': 11055, 'epoch': 2} {'type': 'loss', 'content': 0.03788388893008232, 'timestamp': '2025-09-10 02:38:58.405858', 'step': 11056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:58.437026', 'step': 11056, 'epoch': 2} {'type': 'loss', 'content': 0.1380872130393982, 'timestamp': '2025-09-10 02:38:58.439379', 'step': 11057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:58.469619', 'step': 11057, 'epoch': 2} {'type': 'loss', 'content': 0.06921792775392532, 'timestamp': '2025-09-10 02:38:58.472441', 'step': 11058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.503473', 'step': 11058, 'epoch': 2} {'type': 'loss', 'content': 0.07256906479597092, 'timestamp': '2025-09-10 02:38:58.506304', 'step': 11059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:58.538426', 'step': 11059, 'epoch': 2} {'type': 'loss', 'content': 0.1573704481124878, 'timestamp': '2025-09-10 02:38:58.562254', 'step': 11060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.592904', 'step': 11060, 'epoch': 2} {'type': 'loss', 'content': 0.1347118765115738, 'timestamp': '2025-09-10 02:38:58.596614', 'step': 11061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.626903', 'step': 11061, 'epoch': 2} {'type': 'loss', 'content': 0.04041611775755882, 'timestamp': '2025-09-10 02:38:58.629419', 'step': 11062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:58.660442', 'step': 11062, 'epoch': 2} {'type': 'loss', 'content': 0.055637139827013016, 'timestamp': '2025-09-10 02:38:58.662912', 'step': 11063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:58.695298', 'step': 11063, 'epoch': 2} {'type': 'loss', 'content': 0.11736898124217987, 'timestamp': '2025-09-10 02:38:58.718602', 'step': 11064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.749218', 'step': 11064, 'epoch': 2} {'type': 'loss', 'content': 0.05927962437272072, 'timestamp': '2025-09-10 02:38:58.751447', 'step': 11065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:58.782004', 'step': 11065, 'epoch': 2} {'type': 'loss', 'content': 0.06519937515258789, 'timestamp': '2025-09-10 02:38:58.784525', 'step': 11066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.815822', 'step': 11066, 'epoch': 2} {'type': 'loss', 'content': 0.23952142894268036, 'timestamp': '2025-09-10 02:38:58.818299', 'step': 11067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.848026', 'step': 11067, 'epoch': 2} {'type': 'loss', 'content': 0.1310516595840454, 'timestamp': '2025-09-10 02:38:58.871638', 'step': 11068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.901431', 'step': 11068, 'epoch': 2} {'type': 'loss', 'content': 0.1048438623547554, 'timestamp': '2025-09-10 02:38:58.903817', 'step': 11069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:58.933810', 'step': 11069, 'epoch': 2} {'type': 'loss', 'content': 0.10929397493600845, 'timestamp': '2025-09-10 02:38:58.936112', 'step': 11070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:58.966493', 'step': 11070, 'epoch': 2} {'type': 'loss', 'content': 0.20432089269161224, 'timestamp': '2025-09-10 02:38:58.968753', 'step': 11071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:58.998460', 'step': 11071, 'epoch': 2} {'type': 'loss', 'content': 0.14823532104492188, 'timestamp': '2025-09-10 02:38:59.022939', 'step': 11072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:59.054193', 'step': 11072, 'epoch': 2} {'type': 'loss', 'content': 0.10492587834596634, 'timestamp': '2025-09-10 02:38:59.056617', 'step': 11073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:59.086117', 'step': 11073, 'epoch': 2} {'type': 'loss', 'content': 0.17160528898239136, 'timestamp': '2025-09-10 02:38:59.088500', 'step': 11074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:38:59.118562', 'step': 11074, 'epoch': 2} {'type': 'loss', 'content': 0.06294520944356918, 'timestamp': '2025-09-10 02:38:59.120960', 'step': 11075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:59.150963', 'step': 11075, 'epoch': 2} {'type': 'loss', 'content': 0.17768815159797668, 'timestamp': '2025-09-10 02:38:59.174639', 'step': 11076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:59.204510', 'step': 11076, 'epoch': 2} {'type': 'loss', 'content': 0.08554252982139587, 'timestamp': '2025-09-10 02:38:59.207079', 'step': 11077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:59.237031', 'step': 11077, 'epoch': 2} {'type': 'loss', 'content': 0.10156072676181793, 'timestamp': '2025-09-10 02:38:59.239252', 'step': 11078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:59.268943', 'step': 11078, 'epoch': 2} {'type': 'loss', 'content': 0.0812879204750061, 'timestamp': '2025-09-10 02:38:59.273650', 'step': 11079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:59.304515', 'step': 11079, 'epoch': 2} {'type': 'loss', 'content': 0.16428017616271973, 'timestamp': '2025-09-10 02:38:59.328116', 'step': 11080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:59.358188', 'step': 11080, 'epoch': 2} {'type': 'loss', 'content': 0.1530590057373047, 'timestamp': '2025-09-10 02:38:59.360513', 'step': 11081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:59.391406', 'step': 11081, 'epoch': 2} {'type': 'loss', 'content': 0.07835515588521957, 'timestamp': '2025-09-10 02:38:59.394293', 'step': 11082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:59.424374', 'step': 11082, 'epoch': 2} {'type': 'loss', 'content': 0.11474312841892242, 'timestamp': '2025-09-10 02:38:59.426515', 'step': 11083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:59.457828', 'step': 11083, 'epoch': 2} {'type': 'loss', 'content': 0.1401170939207077, 'timestamp': '2025-09-10 02:38:59.481506', 'step': 11084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:59.513536', 'step': 11084, 'epoch': 2} {'type': 'loss', 'content': 0.12336238473653793, 'timestamp': '2025-09-10 02:38:59.516066', 'step': 11085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:59.547652', 'step': 11085, 'epoch': 2} {'type': 'loss', 'content': 0.03607628121972084, 'timestamp': '2025-09-10 02:38:59.550797', 'step': 11086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:59.581628', 'step': 11086, 'epoch': 2} {'type': 'loss', 'content': 0.06966813653707504, 'timestamp': '2025-09-10 02:38:59.584196', 'step': 11087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:59.615521', 'step': 11087, 'epoch': 2} {'type': 'loss', 'content': 0.05033854395151138, 'timestamp': '2025-09-10 02:38:59.639450', 'step': 11088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:59.669874', 'step': 11088, 'epoch': 2} {'type': 'loss', 'content': 0.15579020977020264, 'timestamp': '2025-09-10 02:38:59.672234', 'step': 11089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:59.702963', 'step': 11089, 'epoch': 2} {'type': 'loss', 'content': 0.11348848044872284, 'timestamp': '2025-09-10 02:38:59.705980', 'step': 11090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:38:59.738778', 'step': 11090, 'epoch': 2} {'type': 'loss', 'content': 0.13765442371368408, 'timestamp': '2025-09-10 02:38:59.742421', 'step': 11091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:59.772322', 'step': 11091, 'epoch': 2} {'type': 'loss', 'content': 0.165363147854805, 'timestamp': '2025-09-10 02:38:59.795863', 'step': 11092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:59.831124', 'step': 11092, 'epoch': 2} {'type': 'loss', 'content': 0.18443316221237183, 'timestamp': '2025-09-10 02:38:59.833343', 'step': 11093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:38:59.865661', 'step': 11093, 'epoch': 2} {'type': 'loss', 'content': 0.10389450937509537, 'timestamp': '2025-09-10 02:38:59.867850', 'step': 11094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:38:59.898343', 'step': 11094, 'epoch': 2} {'type': 'loss', 'content': 0.06668689846992493, 'timestamp': '2025-09-10 02:38:59.900534', 'step': 11095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:38:59.930218', 'step': 11095, 'epoch': 2} {'type': 'loss', 'content': 0.1428200751543045, 'timestamp': '2025-09-10 02:38:59.955098', 'step': 11096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:38:59.985040', 'step': 11096, 'epoch': 2} {'type': 'loss', 'content': 0.1205504760146141, 'timestamp': '2025-09-10 02:38:59.987314', 'step': 11097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:00.016921', 'step': 11097, 'epoch': 2} {'type': 'loss', 'content': 0.1109173446893692, 'timestamp': '2025-09-10 02:39:00.019166', 'step': 11098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:00.051020', 'step': 11098, 'epoch': 2} {'type': 'loss', 'content': 0.1613938808441162, 'timestamp': '2025-09-10 02:39:00.053140', 'step': 11099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:00.082625', 'step': 11099, 'epoch': 2} {'type': 'loss', 'content': 0.15841247141361237, 'timestamp': '2025-09-10 02:39:00.106364', 'step': 11100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:00.139275', 'step': 11100, 'epoch': 2} {'type': 'loss', 'content': 0.10080526769161224, 'timestamp': '2025-09-10 02:39:00.141683', 'step': 11101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:00.171883', 'step': 11101, 'epoch': 2} {'type': 'loss', 'content': 0.10596626251935959, 'timestamp': '2025-09-10 02:39:00.174769', 'step': 11102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:00.204495', 'step': 11102, 'epoch': 2} {'type': 'loss', 'content': 0.10171451419591904, 'timestamp': '2025-09-10 02:39:00.206981', 'step': 11103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:00.237999', 'step': 11103, 'epoch': 2} {'type': 'loss', 'content': 0.08415311574935913, 'timestamp': '2025-09-10 02:39:00.261516', 'step': 11104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:00.291883', 'step': 11104, 'epoch': 2} {'type': 'loss', 'content': 0.09300129115581512, 'timestamp': '2025-09-10 02:39:00.294976', 'step': 11105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:00.325582', 'step': 11105, 'epoch': 2} {'type': 'loss', 'content': 0.09274999797344208, 'timestamp': '2025-09-10 02:39:00.328198', 'step': 11106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:00.358665', 'step': 11106, 'epoch': 2} {'type': 'loss', 'content': 0.06499210000038147, 'timestamp': '2025-09-10 02:39:00.361028', 'step': 11107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:00.390816', 'step': 11107, 'epoch': 2} {'type': 'loss', 'content': 0.17834587395191193, 'timestamp': '2025-09-10 02:39:00.414155', 'step': 11108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:00.447019', 'step': 11108, 'epoch': 2} {'type': 'loss', 'content': 0.11016503721475601, 'timestamp': '2025-09-10 02:39:00.448964', 'step': 11109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:00.479220', 'step': 11109, 'epoch': 2} {'type': 'loss', 'content': 0.22215530276298523, 'timestamp': '2025-09-10 02:39:00.481501', 'step': 11110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:00.511719', 'step': 11110, 'epoch': 2} {'type': 'loss', 'content': 0.14387726783752441, 'timestamp': '2025-09-10 02:39:00.514440', 'step': 11111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:39:00.549963', 'step': 11111, 'epoch': 2} {'type': 'loss', 'content': 0.08772867918014526, 'timestamp': '2025-09-10 02:39:00.584484', 'step': 11112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:00.614307', 'step': 11112, 'epoch': 2} {'type': 'loss', 'content': 0.07478710263967514, 'timestamp': '2025-09-10 02:39:00.616813', 'step': 11113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:00.648946', 'step': 11113, 'epoch': 2} {'type': 'loss', 'content': 0.12653006613254547, 'timestamp': '2025-09-10 02:39:00.651369', 'step': 11114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:00.682038', 'step': 11114, 'epoch': 2} {'type': 'loss', 'content': 0.07923433184623718, 'timestamp': '2025-09-10 02:39:00.684461', 'step': 11115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:00.714280', 'step': 11115, 'epoch': 2} {'type': 'loss', 'content': 0.11460115760564804, 'timestamp': '2025-09-10 02:39:00.738176', 'step': 11116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:00.769886', 'step': 11116, 'epoch': 2} {'type': 'loss', 'content': 0.1628614217042923, 'timestamp': '2025-09-10 02:39:00.772195', 'step': 11117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:00.802204', 'step': 11117, 'epoch': 2} {'type': 'loss', 'content': 0.07243696600198746, 'timestamp': '2025-09-10 02:39:00.807403', 'step': 11118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:00.837295', 'step': 11118, 'epoch': 2} {'type': 'loss', 'content': 0.06329824030399323, 'timestamp': '2025-09-10 02:39:00.839656', 'step': 11119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:00.870213', 'step': 11119, 'epoch': 2} {'type': 'loss', 'content': 0.21738658845424652, 'timestamp': '2025-09-10 02:39:00.893962', 'step': 11120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:00.926200', 'step': 11120, 'epoch': 2} {'type': 'loss', 'content': 0.1308237612247467, 'timestamp': '2025-09-10 02:39:00.928696', 'step': 11121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:00.958667', 'step': 11121, 'epoch': 2} {'type': 'loss', 'content': 0.1229695975780487, 'timestamp': '2025-09-10 02:39:00.960938', 'step': 11122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:00.990665', 'step': 11122, 'epoch': 2} {'type': 'loss', 'content': 0.20904390513896942, 'timestamp': '2025-09-10 02:39:00.993092', 'step': 11123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:01.023298', 'step': 11123, 'epoch': 2} {'type': 'loss', 'content': 0.09222917258739471, 'timestamp': '2025-09-10 02:39:01.050072', 'step': 11124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:39:01.081126', 'step': 11124, 'epoch': 2} {'type': 'loss', 'content': 0.09506982564926147, 'timestamp': '2025-09-10 02:39:01.083325', 'step': 11125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:01.113940', 'step': 11125, 'epoch': 2} {'type': 'loss', 'content': 0.13534456491470337, 'timestamp': '2025-09-10 02:39:01.116976', 'step': 11126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:01.147871', 'step': 11126, 'epoch': 2} {'type': 'loss', 'content': 0.13119861483573914, 'timestamp': '2025-09-10 02:39:01.150323', 'step': 11127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:01.180373', 'step': 11127, 'epoch': 2} {'type': 'loss', 'content': 0.07965166866779327, 'timestamp': '2025-09-10 02:39:01.204106', 'step': 11128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:01.235710', 'step': 11128, 'epoch': 2} {'type': 'loss', 'content': 0.05253143981099129, 'timestamp': '2025-09-10 02:39:01.240078', 'step': 11129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:01.270019', 'step': 11129, 'epoch': 2} {'type': 'loss', 'content': 0.11110588908195496, 'timestamp': '2025-09-10 02:39:01.272651', 'step': 11130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:01.304586', 'step': 11130, 'epoch': 2} {'type': 'loss', 'content': 0.08842463791370392, 'timestamp': '2025-09-10 02:39:01.307125', 'step': 11131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:01.337752', 'step': 11131, 'epoch': 2} {'type': 'loss', 'content': 0.15888887643814087, 'timestamp': '2025-09-10 02:39:01.361341', 'step': 11132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:01.391362', 'step': 11132, 'epoch': 2} {'type': 'loss', 'content': 0.06983912736177444, 'timestamp': '2025-09-10 02:39:01.394002', 'step': 11133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:01.424047', 'step': 11133, 'epoch': 2} {'type': 'loss', 'content': 0.11348234117031097, 'timestamp': '2025-09-10 02:39:01.426534', 'step': 11134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:01.458962', 'step': 11134, 'epoch': 2} {'type': 'loss', 'content': 0.18557456135749817, 'timestamp': '2025-09-10 02:39:01.461488', 'step': 11135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:01.491604', 'step': 11135, 'epoch': 2} {'type': 'loss', 'content': 0.041986655443906784, 'timestamp': '2025-09-10 02:39:01.515079', 'step': 11136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:01.545635', 'step': 11136, 'epoch': 2} {'type': 'loss', 'content': 0.14102138578891754, 'timestamp': '2025-09-10 02:39:01.549382', 'step': 11137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:01.582967', 'step': 11137, 'epoch': 2} {'type': 'loss', 'content': 0.0889408141374588, 'timestamp': '2025-09-10 02:39:01.585536', 'step': 11138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:01.617155', 'step': 11138, 'epoch': 2} {'type': 'loss', 'content': 0.10599356144666672, 'timestamp': '2025-09-10 02:39:01.620103', 'step': 11139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:01.651885', 'step': 11139, 'epoch': 2} {'type': 'loss', 'content': 0.164378359913826, 'timestamp': '2025-09-10 02:39:01.675952', 'step': 11140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:39:01.708148', 'step': 11140, 'epoch': 2} {'type': 'loss', 'content': 0.09857384860515594, 'timestamp': '2025-09-10 02:39:01.710840', 'step': 11141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:01.743885', 'step': 11141, 'epoch': 2} {'type': 'loss', 'content': 0.06867116689682007, 'timestamp': '2025-09-10 02:39:01.746322', 'step': 11142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:01.776674', 'step': 11142, 'epoch': 2} {'type': 'loss', 'content': 0.1384410560131073, 'timestamp': '2025-09-10 02:39:01.778804', 'step': 11143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:01.808363', 'step': 11143, 'epoch': 2} {'type': 'loss', 'content': 0.09009409695863724, 'timestamp': '2025-09-10 02:39:01.833297', 'step': 11144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:01.865498', 'step': 11144, 'epoch': 2} {'type': 'loss', 'content': 0.14299660921096802, 'timestamp': '2025-09-10 02:39:01.868218', 'step': 11145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:01.898496', 'step': 11145, 'epoch': 2} {'type': 'loss', 'content': 0.24352985620498657, 'timestamp': '2025-09-10 02:39:01.900759', 'step': 11146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:01.931734', 'step': 11146, 'epoch': 2} {'type': 'loss', 'content': 0.1328471451997757, 'timestamp': '2025-09-10 02:39:01.933942', 'step': 11147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:01.966014', 'step': 11147, 'epoch': 2} {'type': 'loss', 'content': 0.11662932485342026, 'timestamp': '2025-09-10 02:39:01.989438', 'step': 11148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:02.020210', 'step': 11148, 'epoch': 2} {'type': 'loss', 'content': 0.12603655457496643, 'timestamp': '2025-09-10 02:39:02.022951', 'step': 11149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:02.052859', 'step': 11149, 'epoch': 2} {'type': 'loss', 'content': 0.1465655416250229, 'timestamp': '2025-09-10 02:39:02.055235', 'step': 11150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:02.085024', 'step': 11150, 'epoch': 2} {'type': 'loss', 'content': 0.08539585769176483, 'timestamp': '2025-09-10 02:39:02.087428', 'step': 11151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:02.117493', 'step': 11151, 'epoch': 2} {'type': 'loss', 'content': 0.08569565415382385, 'timestamp': '2025-09-10 02:39:02.141385', 'step': 11152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:02.171758', 'step': 11152, 'epoch': 2} {'type': 'loss', 'content': 0.07743208110332489, 'timestamp': '2025-09-10 02:39:02.174254', 'step': 11153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.205129', 'step': 11153, 'epoch': 2} {'type': 'loss', 'content': 0.19042131304740906, 'timestamp': '2025-09-10 02:39:02.207318', 'step': 11154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.238285', 'step': 11154, 'epoch': 2} {'type': 'loss', 'content': 0.22738943994045258, 'timestamp': '2025-09-10 02:39:02.242380', 'step': 11155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.273098', 'step': 11155, 'epoch': 2} {'type': 'loss', 'content': 0.06049346923828125, 'timestamp': '2025-09-10 02:39:02.296795', 'step': 11156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:39:02.327290', 'step': 11156, 'epoch': 2} {'type': 'loss', 'content': 0.03440884128212929, 'timestamp': '2025-09-10 02:39:02.329697', 'step': 11157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.359986', 'step': 11157, 'epoch': 2} {'type': 'loss', 'content': 0.08742579817771912, 'timestamp': '2025-09-10 02:39:02.362328', 'step': 11158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.392838', 'step': 11158, 'epoch': 2} {'type': 'loss', 'content': 0.050224483013153076, 'timestamp': '2025-09-10 02:39:02.395088', 'step': 11159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:02.427939', 'step': 11159, 'epoch': 2} {'type': 'loss', 'content': 0.06468364596366882, 'timestamp': '2025-09-10 02:39:02.451834', 'step': 11160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:02.482913', 'step': 11160, 'epoch': 2} {'type': 'loss', 'content': 0.15851999819278717, 'timestamp': '2025-09-10 02:39:02.485239', 'step': 11161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.515590', 'step': 11161, 'epoch': 2} {'type': 'loss', 'content': 0.11130580306053162, 'timestamp': '2025-09-10 02:39:02.518293', 'step': 11162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:02.548374', 'step': 11162, 'epoch': 2} {'type': 'loss', 'content': 0.11217379570007324, 'timestamp': '2025-09-10 02:39:02.550828', 'step': 11163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:02.581674', 'step': 11163, 'epoch': 2} {'type': 'loss', 'content': 0.13440293073654175, 'timestamp': '2025-09-10 02:39:02.605677', 'step': 11164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:02.635965', 'step': 11164, 'epoch': 2} {'type': 'loss', 'content': 0.12860848009586334, 'timestamp': '2025-09-10 02:39:02.638103', 'step': 11165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.671266', 'step': 11165, 'epoch': 2} {'type': 'loss', 'content': 0.0828266441822052, 'timestamp': '2025-09-10 02:39:02.673505', 'step': 11166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.704419', 'step': 11166, 'epoch': 2} {'type': 'loss', 'content': 0.18861903250217438, 'timestamp': '2025-09-10 02:39:02.706724', 'step': 11167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.736643', 'step': 11167, 'epoch': 2} {'type': 'loss', 'content': 0.14536090195178986, 'timestamp': '2025-09-10 02:39:02.760035', 'step': 11168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:02.793514', 'step': 11168, 'epoch': 2} {'type': 'loss', 'content': 0.12984412908554077, 'timestamp': '2025-09-10 02:39:02.795640', 'step': 11169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:02.826042', 'step': 11169, 'epoch': 2} {'type': 'loss', 'content': 0.055914122611284256, 'timestamp': '2025-09-10 02:39:02.829889', 'step': 11170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:02.861293', 'step': 11170, 'epoch': 2} {'type': 'loss', 'content': 0.13946399092674255, 'timestamp': '2025-09-10 02:39:02.864189', 'step': 11171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:02.894757', 'step': 11171, 'epoch': 2} {'type': 'loss', 'content': 0.07581847161054611, 'timestamp': '2025-09-10 02:39:02.918616', 'step': 11172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.949514', 'step': 11172, 'epoch': 2} {'type': 'loss', 'content': 0.14333269000053406, 'timestamp': '2025-09-10 02:39:02.952450', 'step': 11173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:02.982771', 'step': 11173, 'epoch': 2} {'type': 'loss', 'content': 0.19576437771320343, 'timestamp': '2025-09-10 02:39:02.985329', 'step': 11174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:03.015452', 'step': 11174, 'epoch': 2} {'type': 'loss', 'content': 0.16947519779205322, 'timestamp': '2025-09-10 02:39:03.017678', 'step': 11175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:03.047542', 'step': 11175, 'epoch': 2} {'type': 'loss', 'content': 0.19338832795619965, 'timestamp': '2025-09-10 02:39:03.071126', 'step': 11176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:03.101920', 'step': 11176, 'epoch': 2} {'type': 'loss', 'content': 0.0720122680068016, 'timestamp': '2025-09-10 02:39:03.104522', 'step': 11177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:03.134840', 'step': 11177, 'epoch': 2} {'type': 'loss', 'content': 0.07652277499437332, 'timestamp': '2025-09-10 02:39:03.137665', 'step': 11178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:03.168558', 'step': 11178, 'epoch': 2} {'type': 'loss', 'content': 0.1354832798242569, 'timestamp': '2025-09-10 02:39:03.171342', 'step': 11179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:03.202737', 'step': 11179, 'epoch': 2} {'type': 'loss', 'content': 0.0996273010969162, 'timestamp': '2025-09-10 02:39:03.226298', 'step': 11180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:03.257178', 'step': 11180, 'epoch': 2} {'type': 'loss', 'content': 0.14726822078227997, 'timestamp': '2025-09-10 02:39:03.259706', 'step': 11181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:03.290257', 'step': 11181, 'epoch': 2} {'type': 'loss', 'content': 0.056992243975400925, 'timestamp': '2025-09-10 02:39:03.292599', 'step': 11182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:03.322364', 'step': 11182, 'epoch': 2} {'type': 'loss', 'content': 0.169138103723526, 'timestamp': '2025-09-10 02:39:03.324543', 'step': 11183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:03.354555', 'step': 11183, 'epoch': 2} {'type': 'loss', 'content': 0.11003482341766357, 'timestamp': '2025-09-10 02:39:03.378999', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:39:11.442569', 'step': 11184, 'epoch': 2} {'type': 'pplx', 'content': 11980.647793339398, 'timestamp': '2025-09-10 02:39:11.446133', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:11.475251', 'step': 11184, 'epoch': 2} {'type': 'loss', 'content': 0.12694276869297028, 'timestamp': '2025-09-10 02:39:11.480479', 'step': 11185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:11.512557', 'step': 11185, 'epoch': 2} {'type': 'loss', 'content': 0.11384519189596176, 'timestamp': '2025-09-10 02:39:11.515129', 'step': 11186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:11.546649', 'step': 11186, 'epoch': 2} {'type': 'loss', 'content': 0.12164638936519623, 'timestamp': '2025-09-10 02:39:11.550886', 'step': 11187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:11.580915', 'step': 11187, 'epoch': 2} {'type': 'loss', 'content': 0.10293520987033844, 'timestamp': '2025-09-10 02:39:11.604527', 'step': 11188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:11.635577', 'step': 11188, 'epoch': 2} {'type': 'loss', 'content': 0.05956357344985008, 'timestamp': '2025-09-10 02:39:11.637859', 'step': 11189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:11.667811', 'step': 11189, 'epoch': 2} {'type': 'loss', 'content': 0.15066686272621155, 'timestamp': '2025-09-10 02:39:11.670499', 'step': 11190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:11.704047', 'step': 11190, 'epoch': 2} {'type': 'loss', 'content': 0.11339664459228516, 'timestamp': '2025-09-10 02:39:11.706562', 'step': 11191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:11.736933', 'step': 11191, 'epoch': 2} {'type': 'loss', 'content': 0.19084565341472626, 'timestamp': '2025-09-10 02:39:11.760603', 'step': 11192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:11.793675', 'step': 11192, 'epoch': 2} {'type': 'loss', 'content': 0.14328445494174957, 'timestamp': '2025-09-10 02:39:11.795827', 'step': 11193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:11.826528', 'step': 11193, 'epoch': 2} {'type': 'loss', 'content': 0.09955854713916779, 'timestamp': '2025-09-10 02:39:11.829522', 'step': 11194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:11.860399', 'step': 11194, 'epoch': 2} {'type': 'loss', 'content': 0.08937768638134003, 'timestamp': '2025-09-10 02:39:11.865514', 'step': 11195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:11.895414', 'step': 11195, 'epoch': 2} {'type': 'loss', 'content': 0.13609573245048523, 'timestamp': '2025-09-10 02:39:11.918972', 'step': 11196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:11.949322', 'step': 11196, 'epoch': 2} {'type': 'loss', 'content': 0.0982532724738121, 'timestamp': '2025-09-10 02:39:11.951429', 'step': 11197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:11.980727', 'step': 11197, 'epoch': 2} {'type': 'loss', 'content': 0.031921762973070145, 'timestamp': '2025-09-10 02:39:11.983250', 'step': 11198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:12.013185', 'step': 11198, 'epoch': 2} {'type': 'loss', 'content': 0.1079600378870964, 'timestamp': '2025-09-10 02:39:12.016313', 'step': 11199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.047263', 'step': 11199, 'epoch': 2} {'type': 'loss', 'content': 0.11406958848237991, 'timestamp': '2025-09-10 02:39:12.071212', 'step': 11200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.103231', 'step': 11200, 'epoch': 2} {'type': 'loss', 'content': 0.1643446534872055, 'timestamp': '2025-09-10 02:39:12.105639', 'step': 11201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.136425', 'step': 11201, 'epoch': 2} {'type': 'loss', 'content': 0.055550944060087204, 'timestamp': '2025-09-10 02:39:12.138757', 'step': 11202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:12.168995', 'step': 11202, 'epoch': 2} {'type': 'loss', 'content': 0.11832182854413986, 'timestamp': '2025-09-10 02:39:12.171511', 'step': 11203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.201866', 'step': 11203, 'epoch': 2} {'type': 'loss', 'content': 0.15805292129516602, 'timestamp': '2025-09-10 02:39:12.225599', 'step': 11204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:12.256793', 'step': 11204, 'epoch': 2} {'type': 'loss', 'content': 0.11370185762643814, 'timestamp': '2025-09-10 02:39:12.259257', 'step': 11205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:12.289604', 'step': 11205, 'epoch': 2} {'type': 'loss', 'content': 0.08711107820272446, 'timestamp': '2025-09-10 02:39:12.292160', 'step': 11206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:12.323892', 'step': 11206, 'epoch': 2} {'type': 'loss', 'content': 0.11415796726942062, 'timestamp': '2025-09-10 02:39:12.326268', 'step': 11207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:12.357686', 'step': 11207, 'epoch': 2} {'type': 'loss', 'content': 0.11413317918777466, 'timestamp': '2025-09-10 02:39:12.381742', 'step': 11208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.412498', 'step': 11208, 'epoch': 2} {'type': 'loss', 'content': 0.13562357425689697, 'timestamp': '2025-09-10 02:39:12.414997', 'step': 11209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:12.445515', 'step': 11209, 'epoch': 2} {'type': 'loss', 'content': 0.12046787142753601, 'timestamp': '2025-09-10 02:39:12.448417', 'step': 11210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.478737', 'step': 11210, 'epoch': 2} {'type': 'loss', 'content': 0.1500834822654724, 'timestamp': '2025-09-10 02:39:12.481312', 'step': 11211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:12.511651', 'step': 11211, 'epoch': 2} {'type': 'loss', 'content': 0.052958693355321884, 'timestamp': '2025-09-10 02:39:12.535356', 'step': 11212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.566559', 'step': 11212, 'epoch': 2} {'type': 'loss', 'content': 0.10258331149816513, 'timestamp': '2025-09-10 02:39:12.568501', 'step': 11213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.598783', 'step': 11213, 'epoch': 2} {'type': 'loss', 'content': 0.08154408633708954, 'timestamp': '2025-09-10 02:39:12.600806', 'step': 11214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:12.631459', 'step': 11214, 'epoch': 2} {'type': 'loss', 'content': 0.09177903085947037, 'timestamp': '2025-09-10 02:39:12.633798', 'step': 11215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:12.663749', 'step': 11215, 'epoch': 2} {'type': 'loss', 'content': 0.08536958694458008, 'timestamp': '2025-09-10 02:39:12.687802', 'step': 11216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.717910', 'step': 11216, 'epoch': 2} {'type': 'loss', 'content': 0.09156087785959244, 'timestamp': '2025-09-10 02:39:12.720005', 'step': 11217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:39:12.750098', 'step': 11217, 'epoch': 2} {'type': 'loss', 'content': 0.17333972454071045, 'timestamp': '2025-09-10 02:39:12.754775', 'step': 11218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:12.784580', 'step': 11218, 'epoch': 2} {'type': 'loss', 'content': 0.06953806430101395, 'timestamp': '2025-09-10 02:39:12.787323', 'step': 11219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:12.817990', 'step': 11219, 'epoch': 2} {'type': 'loss', 'content': 0.057106439024209976, 'timestamp': '2025-09-10 02:39:12.841424', 'step': 11220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:12.872434', 'step': 11220, 'epoch': 2} {'type': 'loss', 'content': 0.054162364453077316, 'timestamp': '2025-09-10 02:39:12.879616', 'step': 11221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:12.911881', 'step': 11221, 'epoch': 2} {'type': 'loss', 'content': 0.07602155953645706, 'timestamp': '2025-09-10 02:39:12.914644', 'step': 11222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:12.944740', 'step': 11222, 'epoch': 2} {'type': 'loss', 'content': 0.11039146035909653, 'timestamp': '2025-09-10 02:39:12.947104', 'step': 11223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:12.977365', 'step': 11223, 'epoch': 2} {'type': 'loss', 'content': 0.09834606200456619, 'timestamp': '2025-09-10 02:39:13.002519', 'step': 11224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:13.032920', 'step': 11224, 'epoch': 2} {'type': 'loss', 'content': 0.03798230364918709, 'timestamp': '2025-09-10 02:39:13.035416', 'step': 11225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:13.065967', 'step': 11225, 'epoch': 2} {'type': 'loss', 'content': 0.053067583590745926, 'timestamp': '2025-09-10 02:39:13.068905', 'step': 11226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:13.099575', 'step': 11226, 'epoch': 2} {'type': 'loss', 'content': 0.054665908217430115, 'timestamp': '2025-09-10 02:39:13.101859', 'step': 11227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:13.133159', 'step': 11227, 'epoch': 2} {'type': 'loss', 'content': 0.12490794062614441, 'timestamp': '2025-09-10 02:39:13.158152', 'step': 11228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:13.192261', 'step': 11228, 'epoch': 2} {'type': 'loss', 'content': 0.1274873912334442, 'timestamp': '2025-09-10 02:39:13.197055', 'step': 11229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:13.235769', 'step': 11229, 'epoch': 2} {'type': 'loss', 'content': 0.09201053529977798, 'timestamp': '2025-09-10 02:39:13.239485', 'step': 11230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:13.274736', 'step': 11230, 'epoch': 2} {'type': 'loss', 'content': 0.04956432431936264, 'timestamp': '2025-09-10 02:39:13.277371', 'step': 11231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:13.309574', 'step': 11231, 'epoch': 2} {'type': 'loss', 'content': 0.10201194137334824, 'timestamp': '2025-09-10 02:39:13.333773', 'step': 11232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:13.363779', 'step': 11232, 'epoch': 2} {'type': 'loss', 'content': 0.09531770646572113, 'timestamp': '2025-09-10 02:39:13.365912', 'step': 11233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:13.413219', 'step': 11233, 'epoch': 2} {'type': 'loss', 'content': 0.1925182044506073, 'timestamp': '2025-09-10 02:39:13.415564', 'step': 11234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:13.458510', 'step': 11234, 'epoch': 2} {'type': 'loss', 'content': 0.04170965775847435, 'timestamp': '2025-09-10 02:39:13.479633', 'step': 11235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:13.518635', 'step': 11235, 'epoch': 2} {'type': 'loss', 'content': 0.03141285851597786, 'timestamp': '2025-09-10 02:39:13.542452', 'step': 11236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:13.587387', 'step': 11236, 'epoch': 2} {'type': 'loss', 'content': 0.09811875969171524, 'timestamp': '2025-09-10 02:39:13.605534', 'step': 11237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:13.647189', 'step': 11237, 'epoch': 2} {'type': 'loss', 'content': 0.11668667197227478, 'timestamp': '2025-09-10 02:39:13.651327', 'step': 11238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:13.685541', 'step': 11238, 'epoch': 2} {'type': 'loss', 'content': 0.05788548290729523, 'timestamp': '2025-09-10 02:39:13.689165', 'step': 11239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:13.719374', 'step': 11239, 'epoch': 2} {'type': 'loss', 'content': 0.11083436757326126, 'timestamp': '2025-09-10 02:39:13.745265', 'step': 11240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:13.776265', 'step': 11240, 'epoch': 2} {'type': 'loss', 'content': 0.1049439087510109, 'timestamp': '2025-09-10 02:39:13.778839', 'step': 11241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:13.813672', 'step': 11241, 'epoch': 2} {'type': 'loss', 'content': 0.058529675006866455, 'timestamp': '2025-09-10 02:39:13.817378', 'step': 11242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:13.851744', 'step': 11242, 'epoch': 2} {'type': 'loss', 'content': 0.10358639806509018, 'timestamp': '2025-09-10 02:39:13.860640', 'step': 11243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:13.904140', 'step': 11243, 'epoch': 2} {'type': 'loss', 'content': 0.08011669665575027, 'timestamp': '2025-09-10 02:39:13.930404', 'step': 11244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:13.964202', 'step': 11244, 'epoch': 2} {'type': 'loss', 'content': 0.1223188266158104, 'timestamp': '2025-09-10 02:39:13.967443', 'step': 11245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:14.004135', 'step': 11245, 'epoch': 2} {'type': 'loss', 'content': 0.059690769761800766, 'timestamp': '2025-09-10 02:39:14.011398', 'step': 11246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:14.053199', 'step': 11246, 'epoch': 2} {'type': 'loss', 'content': 0.10470768064260483, 'timestamp': '2025-09-10 02:39:14.055631', 'step': 11247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:39:14.088031', 'step': 11247, 'epoch': 2} {'type': 'loss', 'content': 0.1510375738143921, 'timestamp': '2025-09-10 02:39:14.113104', 'step': 11248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:14.151349', 'step': 11248, 'epoch': 2} {'type': 'loss', 'content': 0.12789888679981232, 'timestamp': '2025-09-10 02:39:14.155109', 'step': 11249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:14.190385', 'step': 11249, 'epoch': 2} {'type': 'loss', 'content': 0.0940122976899147, 'timestamp': '2025-09-10 02:39:14.199489', 'step': 11250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:14.254246', 'step': 11250, 'epoch': 2} {'type': 'loss', 'content': 0.10004588216543198, 'timestamp': '2025-09-10 02:39:14.256490', 'step': 11251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:14.286465', 'step': 11251, 'epoch': 2} {'type': 'loss', 'content': 0.06844747066497803, 'timestamp': '2025-09-10 02:39:14.311926', 'step': 11252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:14.341918', 'step': 11252, 'epoch': 2} {'type': 'loss', 'content': 0.1563599556684494, 'timestamp': '2025-09-10 02:39:14.344141', 'step': 11253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:14.390821', 'step': 11253, 'epoch': 2} {'type': 'loss', 'content': 0.16120673716068268, 'timestamp': '2025-09-10 02:39:14.393503', 'step': 11254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:14.431784', 'step': 11254, 'epoch': 2} {'type': 'loss', 'content': 0.11915506422519684, 'timestamp': '2025-09-10 02:39:14.434297', 'step': 11255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:14.464920', 'step': 11255, 'epoch': 2} {'type': 'loss', 'content': 0.1023636907339096, 'timestamp': '2025-09-10 02:39:14.489725', 'step': 11256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:14.526652', 'step': 11256, 'epoch': 2} {'type': 'loss', 'content': 0.03860205411911011, 'timestamp': '2025-09-10 02:39:14.530532', 'step': 11257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:14.560581', 'step': 11257, 'epoch': 2} {'type': 'loss', 'content': 0.07418365031480789, 'timestamp': '2025-09-10 02:39:14.563205', 'step': 11258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:14.593898', 'step': 11258, 'epoch': 2} {'type': 'loss', 'content': 0.18613161146640778, 'timestamp': '2025-09-10 02:39:14.596210', 'step': 11259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:14.627863', 'step': 11259, 'epoch': 2} {'type': 'loss', 'content': 0.07865136116743088, 'timestamp': '2025-09-10 02:39:14.651722', 'step': 11260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:14.684463', 'step': 11260, 'epoch': 2} {'type': 'loss', 'content': 0.059669386595487595, 'timestamp': '2025-09-10 02:39:14.687152', 'step': 11261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:14.716784', 'step': 11261, 'epoch': 2} {'type': 'loss', 'content': 0.06852187216281891, 'timestamp': '2025-09-10 02:39:14.719160', 'step': 11262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:14.748865', 'step': 11262, 'epoch': 2} {'type': 'loss', 'content': 0.05478978902101517, 'timestamp': '2025-09-10 02:39:14.751030', 'step': 11263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:14.780898', 'step': 11263, 'epoch': 2} {'type': 'loss', 'content': 0.1550917774438858, 'timestamp': '2025-09-10 02:39:14.804443', 'step': 11264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:14.833816', 'step': 11264, 'epoch': 2} {'type': 'loss', 'content': 0.06266571581363678, 'timestamp': '2025-09-10 02:39:14.836130', 'step': 11265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:14.865922', 'step': 11265, 'epoch': 2} {'type': 'loss', 'content': 0.07688049972057343, 'timestamp': '2025-09-10 02:39:14.873230', 'step': 11266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:14.905480', 'step': 11266, 'epoch': 2} {'type': 'loss', 'content': 0.1077091246843338, 'timestamp': '2025-09-10 02:39:14.907874', 'step': 11267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:14.938194', 'step': 11267, 'epoch': 2} {'type': 'loss', 'content': 0.02374953031539917, 'timestamp': '2025-09-10 02:39:14.961973', 'step': 11268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:14.992177', 'step': 11268, 'epoch': 2} {'type': 'loss', 'content': 0.14009946584701538, 'timestamp': '2025-09-10 02:39:14.994418', 'step': 11269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:15.024673', 'step': 11269, 'epoch': 2} {'type': 'loss', 'content': 0.09003682434558868, 'timestamp': '2025-09-10 02:39:15.026859', 'step': 11270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:15.056651', 'step': 11270, 'epoch': 2} {'type': 'loss', 'content': 0.0899331271648407, 'timestamp': '2025-09-10 02:39:15.059250', 'step': 11271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.089391', 'step': 11271, 'epoch': 2} {'type': 'loss', 'content': 0.13742172718048096, 'timestamp': '2025-09-10 02:39:15.113133', 'step': 11272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:15.143546', 'step': 11272, 'epoch': 2} {'type': 'loss', 'content': 0.11624723672866821, 'timestamp': '2025-09-10 02:39:15.146151', 'step': 11273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.176293', 'step': 11273, 'epoch': 2} {'type': 'loss', 'content': 0.02887064591050148, 'timestamp': '2025-09-10 02:39:15.178825', 'step': 11274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.208948', 'step': 11274, 'epoch': 2} {'type': 'loss', 'content': 0.10762978345155716, 'timestamp': '2025-09-10 02:39:15.211386', 'step': 11275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:15.241026', 'step': 11275, 'epoch': 2} {'type': 'loss', 'content': 0.12474015355110168, 'timestamp': '2025-09-10 02:39:15.264484', 'step': 11276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:15.294487', 'step': 11276, 'epoch': 2} {'type': 'loss', 'content': 0.12731511890888214, 'timestamp': '2025-09-10 02:39:15.296875', 'step': 11277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:15.327389', 'step': 11277, 'epoch': 2} {'type': 'loss', 'content': 0.04611217603087425, 'timestamp': '2025-09-10 02:39:15.330095', 'step': 11278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:15.360333', 'step': 11278, 'epoch': 2} {'type': 'loss', 'content': 0.19353386759757996, 'timestamp': '2025-09-10 02:39:15.362372', 'step': 11279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.391970', 'step': 11279, 'epoch': 2} {'type': 'loss', 'content': 0.1019708439707756, 'timestamp': '2025-09-10 02:39:15.415882', 'step': 11280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.446029', 'step': 11280, 'epoch': 2} {'type': 'loss', 'content': 0.08629018068313599, 'timestamp': '2025-09-10 02:39:15.448582', 'step': 11281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.478698', 'step': 11281, 'epoch': 2} {'type': 'loss', 'content': 0.13686782121658325, 'timestamp': '2025-09-10 02:39:15.481113', 'step': 11282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.510956', 'step': 11282, 'epoch': 2} {'type': 'loss', 'content': 0.04756197705864906, 'timestamp': '2025-09-10 02:39:15.513233', 'step': 11283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.543259', 'step': 11283, 'epoch': 2} {'type': 'loss', 'content': 0.13257640600204468, 'timestamp': '2025-09-10 02:39:15.567084', 'step': 11284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:15.597238', 'step': 11284, 'epoch': 2} {'type': 'loss', 'content': 0.07517160475254059, 'timestamp': '2025-09-10 02:39:15.599568', 'step': 11285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:15.629760', 'step': 11285, 'epoch': 2} {'type': 'loss', 'content': 0.1668582260608673, 'timestamp': '2025-09-10 02:39:15.632640', 'step': 11286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.662548', 'step': 11286, 'epoch': 2} {'type': 'loss', 'content': 0.05196280777454376, 'timestamp': '2025-09-10 02:39:15.664877', 'step': 11287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:15.694551', 'step': 11287, 'epoch': 2} {'type': 'loss', 'content': 0.18407098948955536, 'timestamp': '2025-09-10 02:39:15.718445', 'step': 11288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:15.748795', 'step': 11288, 'epoch': 2} {'type': 'loss', 'content': 0.08361190557479858, 'timestamp': '2025-09-10 02:39:15.751582', 'step': 11289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:15.782495', 'step': 11289, 'epoch': 2} {'type': 'loss', 'content': 0.07253439724445343, 'timestamp': '2025-09-10 02:39:15.784886', 'step': 11290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:15.815621', 'step': 11290, 'epoch': 2} {'type': 'loss', 'content': 0.07966654002666473, 'timestamp': '2025-09-10 02:39:15.818984', 'step': 11291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:15.851038', 'step': 11291, 'epoch': 2} {'type': 'loss', 'content': 0.1330135613679886, 'timestamp': '2025-09-10 02:39:15.874765', 'step': 11292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:15.906482', 'step': 11292, 'epoch': 2} {'type': 'loss', 'content': 0.14632341265678406, 'timestamp': '2025-09-10 02:39:15.908632', 'step': 11293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:15.938490', 'step': 11293, 'epoch': 2} {'type': 'loss', 'content': 0.1365361362695694, 'timestamp': '2025-09-10 02:39:15.941075', 'step': 11294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:15.971458', 'step': 11294, 'epoch': 2} {'type': 'loss', 'content': 0.1411263346672058, 'timestamp': '2025-09-10 02:39:15.974752', 'step': 11295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.004868', 'step': 11295, 'epoch': 2} {'type': 'loss', 'content': 0.12186674773693085, 'timestamp': '2025-09-10 02:39:16.028535', 'step': 11296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:16.058246', 'step': 11296, 'epoch': 2} {'type': 'loss', 'content': 0.1251906007528305, 'timestamp': '2025-09-10 02:39:16.060250', 'step': 11297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.089421', 'step': 11297, 'epoch': 2} {'type': 'loss', 'content': 0.09049082547426224, 'timestamp': '2025-09-10 02:39:16.091489', 'step': 11298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:16.121389', 'step': 11298, 'epoch': 2} {'type': 'loss', 'content': 0.09877530485391617, 'timestamp': '2025-09-10 02:39:16.123700', 'step': 11299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:16.153959', 'step': 11299, 'epoch': 2} {'type': 'loss', 'content': 0.12923111021518707, 'timestamp': '2025-09-10 02:39:16.177439', 'step': 11300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:16.207153', 'step': 11300, 'epoch': 2} {'type': 'loss', 'content': 0.1873217225074768, 'timestamp': '2025-09-10 02:39:16.210470', 'step': 11301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.244342', 'step': 11301, 'epoch': 2} {'type': 'loss', 'content': 0.07100050896406174, 'timestamp': '2025-09-10 02:39:16.247271', 'step': 11302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:16.279627', 'step': 11302, 'epoch': 2} {'type': 'loss', 'content': 0.15548014640808105, 'timestamp': '2025-09-10 02:39:16.283196', 'step': 11303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:16.314890', 'step': 11303, 'epoch': 2} {'type': 'loss', 'content': 0.1672293245792389, 'timestamp': '2025-09-10 02:39:16.339160', 'step': 11304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.371021', 'step': 11304, 'epoch': 2} {'type': 'loss', 'content': 0.19699877500534058, 'timestamp': '2025-09-10 02:39:16.373692', 'step': 11305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:16.404942', 'step': 11305, 'epoch': 2} {'type': 'loss', 'content': 0.10435573011636734, 'timestamp': '2025-09-10 02:39:16.407398', 'step': 11306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:16.437865', 'step': 11306, 'epoch': 2} {'type': 'loss', 'content': 0.1763063222169876, 'timestamp': '2025-09-10 02:39:16.440560', 'step': 11307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.471081', 'step': 11307, 'epoch': 2} {'type': 'loss', 'content': 0.21027657389640808, 'timestamp': '2025-09-10 02:39:16.495363', 'step': 11308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.525426', 'step': 11308, 'epoch': 2} {'type': 'loss', 'content': 0.15290485322475433, 'timestamp': '2025-09-10 02:39:16.527979', 'step': 11309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.557681', 'step': 11309, 'epoch': 2} {'type': 'loss', 'content': 0.11299571394920349, 'timestamp': '2025-09-10 02:39:16.559792', 'step': 11310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:16.589837', 'step': 11310, 'epoch': 2} {'type': 'loss', 'content': 0.10038689523935318, 'timestamp': '2025-09-10 02:39:16.592291', 'step': 11311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.622723', 'step': 11311, 'epoch': 2} {'type': 'loss', 'content': 0.07818365097045898, 'timestamp': '2025-09-10 02:39:16.647779', 'step': 11312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:16.677973', 'step': 11312, 'epoch': 2} {'type': 'loss', 'content': 0.15849822759628296, 'timestamp': '2025-09-10 02:39:16.680079', 'step': 11313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:16.710131', 'step': 11313, 'epoch': 2} {'type': 'loss', 'content': 0.13039250671863556, 'timestamp': '2025-09-10 02:39:16.712660', 'step': 11314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:16.742804', 'step': 11314, 'epoch': 2} {'type': 'loss', 'content': 0.16149352490901947, 'timestamp': '2025-09-10 02:39:16.745272', 'step': 11315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.775348', 'step': 11315, 'epoch': 2} {'type': 'loss', 'content': 0.12906716763973236, 'timestamp': '2025-09-10 02:39:16.798771', 'step': 11316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.830627', 'step': 11316, 'epoch': 2} {'type': 'loss', 'content': 0.13090474903583527, 'timestamp': '2025-09-10 02:39:16.832799', 'step': 11317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.862846', 'step': 11317, 'epoch': 2} {'type': 'loss', 'content': 0.1044730544090271, 'timestamp': '2025-09-10 02:39:16.865110', 'step': 11318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:16.894872', 'step': 11318, 'epoch': 2} {'type': 'loss', 'content': 0.06339384615421295, 'timestamp': '2025-09-10 02:39:16.897117', 'step': 11319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:16.926619', 'step': 11319, 'epoch': 2} {'type': 'loss', 'content': 0.06274492293596268, 'timestamp': '2025-09-10 02:39:16.951356', 'step': 11320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:16.980969', 'step': 11320, 'epoch': 2} {'type': 'loss', 'content': 0.19794456660747528, 'timestamp': '2025-09-10 02:39:16.983086', 'step': 11321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:17.013007', 'step': 11321, 'epoch': 2} {'type': 'loss', 'content': 0.13528841733932495, 'timestamp': '2025-09-10 02:39:17.015248', 'step': 11322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:17.045236', 'step': 11322, 'epoch': 2} {'type': 'loss', 'content': 0.09733191132545471, 'timestamp': '2025-09-10 02:39:17.047538', 'step': 11323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:17.076971', 'step': 11323, 'epoch': 2} {'type': 'loss', 'content': 0.04635852947831154, 'timestamp': '2025-09-10 02:39:17.100476', 'step': 11324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:17.130191', 'step': 11324, 'epoch': 2} {'type': 'loss', 'content': 0.1826406568288803, 'timestamp': '2025-09-10 02:39:17.133771', 'step': 11325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:17.164626', 'step': 11325, 'epoch': 2} {'type': 'loss', 'content': 0.20404702425003052, 'timestamp': '2025-09-10 02:39:17.167011', 'step': 11326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:17.196885', 'step': 11326, 'epoch': 2} {'type': 'loss', 'content': 0.05462918058037758, 'timestamp': '2025-09-10 02:39:17.199152', 'step': 11327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:17.229031', 'step': 11327, 'epoch': 2} {'type': 'loss', 'content': 0.044067975133657455, 'timestamp': '2025-09-10 02:39:17.252666', 'step': 11328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:17.282684', 'step': 11328, 'epoch': 2} {'type': 'loss', 'content': 0.09754295647144318, 'timestamp': '2025-09-10 02:39:17.284656', 'step': 11329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:17.314389', 'step': 11329, 'epoch': 2} {'type': 'loss', 'content': 0.14355823397636414, 'timestamp': '2025-09-10 02:39:17.316736', 'step': 11330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:17.346861', 'step': 11330, 'epoch': 2} {'type': 'loss', 'content': 0.05754406377673149, 'timestamp': '2025-09-10 02:39:17.349356', 'step': 11331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:17.379125', 'step': 11331, 'epoch': 2} {'type': 'loss', 'content': 0.20190058648586273, 'timestamp': '2025-09-10 02:39:17.402942', 'step': 11332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:17.433598', 'step': 11332, 'epoch': 2} {'type': 'loss', 'content': 0.20108066499233246, 'timestamp': '2025-09-10 02:39:17.437998', 'step': 11333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:17.469088', 'step': 11333, 'epoch': 2} {'type': 'loss', 'content': 0.11293870955705643, 'timestamp': '2025-09-10 02:39:17.471467', 'step': 11334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:17.501741', 'step': 11334, 'epoch': 2} {'type': 'loss', 'content': 0.05864877253770828, 'timestamp': '2025-09-10 02:39:17.504459', 'step': 11335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:17.534289', 'step': 11335, 'epoch': 2} {'type': 'loss', 'content': 0.05280476063489914, 'timestamp': '2025-09-10 02:39:17.559586', 'step': 11336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:17.590215', 'step': 11336, 'epoch': 2} {'type': 'loss', 'content': 0.06265981495380402, 'timestamp': '2025-09-10 02:39:17.592630', 'step': 11337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:17.623176', 'step': 11337, 'epoch': 2} {'type': 'loss', 'content': 0.05999966710805893, 'timestamp': '2025-09-10 02:39:17.625742', 'step': 11338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:17.655864', 'step': 11338, 'epoch': 2} {'type': 'loss', 'content': 0.1563323736190796, 'timestamp': '2025-09-10 02:39:17.658322', 'step': 11339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:17.688031', 'step': 11339, 'epoch': 2} {'type': 'loss', 'content': 0.06960774213075638, 'timestamp': '2025-09-10 02:39:17.711587', 'step': 11340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:17.743023', 'step': 11340, 'epoch': 2} {'type': 'loss', 'content': 0.14784258604049683, 'timestamp': '2025-09-10 02:39:17.745288', 'step': 11341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:17.775185', 'step': 11341, 'epoch': 2} {'type': 'loss', 'content': 0.12055287510156631, 'timestamp': '2025-09-10 02:39:17.778079', 'step': 11342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:17.808359', 'step': 11342, 'epoch': 2} {'type': 'loss', 'content': 0.06555214524269104, 'timestamp': '2025-09-10 02:39:17.810868', 'step': 11343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:17.841863', 'step': 11343, 'epoch': 2} {'type': 'loss', 'content': 0.14748996496200562, 'timestamp': '2025-09-10 02:39:17.865475', 'step': 11344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:17.896327', 'step': 11344, 'epoch': 2} {'type': 'loss', 'content': 0.15378692746162415, 'timestamp': '2025-09-10 02:39:17.899152', 'step': 11345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:17.928981', 'step': 11345, 'epoch': 2} {'type': 'loss', 'content': 0.06968989223241806, 'timestamp': '2025-09-10 02:39:17.931611', 'step': 11346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:17.961878', 'step': 11346, 'epoch': 2} {'type': 'loss', 'content': 0.11793911457061768, 'timestamp': '2025-09-10 02:39:17.964370', 'step': 11347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:17.993959', 'step': 11347, 'epoch': 2} {'type': 'loss', 'content': 0.1455007940530777, 'timestamp': '2025-09-10 02:39:18.018282', 'step': 11348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:18.048674', 'step': 11348, 'epoch': 2} {'type': 'loss', 'content': 0.1360730081796646, 'timestamp': '2025-09-10 02:39:18.051005', 'step': 11349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:18.081077', 'step': 11349, 'epoch': 2} {'type': 'loss', 'content': 0.13710972666740417, 'timestamp': '2025-09-10 02:39:18.083450', 'step': 11350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:18.114100', 'step': 11350, 'epoch': 2} {'type': 'loss', 'content': 0.10528358072042465, 'timestamp': '2025-09-10 02:39:18.115989', 'step': 11351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:18.145696', 'step': 11351, 'epoch': 2} {'type': 'loss', 'content': 0.14933443069458008, 'timestamp': '2025-09-10 02:39:18.169208', 'step': 11352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:18.199957', 'step': 11352, 'epoch': 2} {'type': 'loss', 'content': 0.06015344709157944, 'timestamp': '2025-09-10 02:39:18.202263', 'step': 11353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:18.232636', 'step': 11353, 'epoch': 2} {'type': 'loss', 'content': 0.11271452903747559, 'timestamp': '2025-09-10 02:39:18.235050', 'step': 11354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:18.265275', 'step': 11354, 'epoch': 2} {'type': 'loss', 'content': 0.1209479570388794, 'timestamp': '2025-09-10 02:39:18.267783', 'step': 11355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:18.297837', 'step': 11355, 'epoch': 2} {'type': 'loss', 'content': 0.08091720938682556, 'timestamp': '2025-09-10 02:39:18.321560', 'step': 11356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:18.351934', 'step': 11356, 'epoch': 2} {'type': 'loss', 'content': 0.11973396688699722, 'timestamp': '2025-09-10 02:39:18.354297', 'step': 11357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:18.385292', 'step': 11357, 'epoch': 2} {'type': 'loss', 'content': 0.10024836659431458, 'timestamp': '2025-09-10 02:39:18.387744', 'step': 11358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:18.418148', 'step': 11358, 'epoch': 2} {'type': 'loss', 'content': 0.09283272922039032, 'timestamp': '2025-09-10 02:39:18.420371', 'step': 11359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:18.450127', 'step': 11359, 'epoch': 2} {'type': 'loss', 'content': 0.15920928120613098, 'timestamp': '2025-09-10 02:39:18.473872', 'step': 11360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:18.504748', 'step': 11360, 'epoch': 2} {'type': 'loss', 'content': 0.14236724376678467, 'timestamp': '2025-09-10 02:39:18.507106', 'step': 11361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:18.537446', 'step': 11361, 'epoch': 2} {'type': 'loss', 'content': 0.0667824074625969, 'timestamp': '2025-09-10 02:39:18.540269', 'step': 11362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:18.570177', 'step': 11362, 'epoch': 2} {'type': 'loss', 'content': 0.08678777515888214, 'timestamp': '2025-09-10 02:39:18.572370', 'step': 11363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:18.602247', 'step': 11363, 'epoch': 2} {'type': 'loss', 'content': 0.07067412883043289, 'timestamp': '2025-09-10 02:39:18.625873', 'step': 11364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:18.655928', 'step': 11364, 'epoch': 2} {'type': 'loss', 'content': 0.11388351768255234, 'timestamp': '2025-09-10 02:39:18.658034', 'step': 11365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:18.687375', 'step': 11365, 'epoch': 2} {'type': 'loss', 'content': 0.10057341307401657, 'timestamp': '2025-09-10 02:39:18.689728', 'step': 11366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:18.719928', 'step': 11366, 'epoch': 2} {'type': 'loss', 'content': 0.12087554484605789, 'timestamp': '2025-09-10 02:39:18.722436', 'step': 11367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:18.752734', 'step': 11367, 'epoch': 2} {'type': 'loss', 'content': 0.0532357320189476, 'timestamp': '2025-09-10 02:39:18.776784', 'step': 11368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:18.806961', 'step': 11368, 'epoch': 2} {'type': 'loss', 'content': 0.14196635782718658, 'timestamp': '2025-09-10 02:39:18.809415', 'step': 11369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:18.839656', 'step': 11369, 'epoch': 2} {'type': 'loss', 'content': 0.13874149322509766, 'timestamp': '2025-09-10 02:39:18.843561', 'step': 11370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:18.874376', 'step': 11370, 'epoch': 2} {'type': 'loss', 'content': 0.09729001671075821, 'timestamp': '2025-09-10 02:39:18.876624', 'step': 11371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:18.907171', 'step': 11371, 'epoch': 2} {'type': 'loss', 'content': 0.10356344282627106, 'timestamp': '2025-09-10 02:39:18.930702', 'step': 11372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:18.961780', 'step': 11372, 'epoch': 2} {'type': 'loss', 'content': 0.09100135415792465, 'timestamp': '2025-09-10 02:39:18.964405', 'step': 11373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:18.995842', 'step': 11373, 'epoch': 2} {'type': 'loss', 'content': 0.0700109452009201, 'timestamp': '2025-09-10 02:39:18.998247', 'step': 11374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:19.028199', 'step': 11374, 'epoch': 2} {'type': 'loss', 'content': 0.1440647542476654, 'timestamp': '2025-09-10 02:39:19.030927', 'step': 11375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:19.060727', 'step': 11375, 'epoch': 2} {'type': 'loss', 'content': 0.04923379421234131, 'timestamp': '2025-09-10 02:39:19.084379', 'step': 11376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:19.115342', 'step': 11376, 'epoch': 2} {'type': 'loss', 'content': 0.1449984908103943, 'timestamp': '2025-09-10 02:39:19.117719', 'step': 11377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:19.147902', 'step': 11377, 'epoch': 2} {'type': 'loss', 'content': 0.1917530596256256, 'timestamp': '2025-09-10 02:39:19.150715', 'step': 11378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:19.182491', 'step': 11378, 'epoch': 2} {'type': 'loss', 'content': 0.21293841302394867, 'timestamp': '2025-09-10 02:39:19.185098', 'step': 11379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:19.215208', 'step': 11379, 'epoch': 2} {'type': 'loss', 'content': 0.12107425928115845, 'timestamp': '2025-09-10 02:39:19.238965', 'step': 11380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:19.269135', 'step': 11380, 'epoch': 2} {'type': 'loss', 'content': 0.11083365231752396, 'timestamp': '2025-09-10 02:39:19.271374', 'step': 11381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:19.301307', 'step': 11381, 'epoch': 2} {'type': 'loss', 'content': 0.14222276210784912, 'timestamp': '2025-09-10 02:39:19.303993', 'step': 11382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:19.334225', 'step': 11382, 'epoch': 2} {'type': 'loss', 'content': 0.09681914001703262, 'timestamp': '2025-09-10 02:39:19.337190', 'step': 11383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:19.370483', 'step': 11383, 'epoch': 2} {'type': 'loss', 'content': 0.10680294781923294, 'timestamp': '2025-09-10 02:39:19.394366', 'step': 11384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:19.424788', 'step': 11384, 'epoch': 2} {'type': 'loss', 'content': 0.06681261211633682, 'timestamp': '2025-09-10 02:39:19.427196', 'step': 11385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:19.457279', 'step': 11385, 'epoch': 2} {'type': 'loss', 'content': 0.1428898572921753, 'timestamp': '2025-09-10 02:39:19.460555', 'step': 11386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:19.491969', 'step': 11386, 'epoch': 2} {'type': 'loss', 'content': 0.06596126407384872, 'timestamp': '2025-09-10 02:39:19.494248', 'step': 11387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:19.524704', 'step': 11387, 'epoch': 2} {'type': 'loss', 'content': 0.08595813810825348, 'timestamp': '2025-09-10 02:39:19.548363', 'step': 11388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:19.579044', 'step': 11388, 'epoch': 2} {'type': 'loss', 'content': 0.08210945874452591, 'timestamp': '2025-09-10 02:39:19.582354', 'step': 11389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:19.612843', 'step': 11389, 'epoch': 2} {'type': 'loss', 'content': 0.08522156625986099, 'timestamp': '2025-09-10 02:39:19.615652', 'step': 11390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:19.646805', 'step': 11390, 'epoch': 2} {'type': 'loss', 'content': 0.1828772872686386, 'timestamp': '2025-09-10 02:39:19.649206', 'step': 11391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:19.679010', 'step': 11391, 'epoch': 2} {'type': 'loss', 'content': 0.10364707559347153, 'timestamp': '2025-09-10 02:39:19.702896', 'step': 11392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:19.733130', 'step': 11392, 'epoch': 2} {'type': 'loss', 'content': 0.10967672616243362, 'timestamp': '2025-09-10 02:39:19.735851', 'step': 11393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:19.765893', 'step': 11393, 'epoch': 2} {'type': 'loss', 'content': 0.09835851937532425, 'timestamp': '2025-09-10 02:39:19.768081', 'step': 11394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:19.797849', 'step': 11394, 'epoch': 2} {'type': 'loss', 'content': 0.1656665951013565, 'timestamp': '2025-09-10 02:39:19.800396', 'step': 11395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:19.829821', 'step': 11395, 'epoch': 2} {'type': 'loss', 'content': 0.0441964715719223, 'timestamp': '2025-09-10 02:39:19.853437', 'step': 11396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:19.883440', 'step': 11396, 'epoch': 2} {'type': 'loss', 'content': 0.1248592883348465, 'timestamp': '2025-09-10 02:39:19.885782', 'step': 11397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:19.919967', 'step': 11397, 'epoch': 2} {'type': 'loss', 'content': 0.11546111106872559, 'timestamp': '2025-09-10 02:39:19.922284', 'step': 11398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:19.951865', 'step': 11398, 'epoch': 2} {'type': 'loss', 'content': 0.12832890450954437, 'timestamp': '2025-09-10 02:39:19.955991', 'step': 11399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:19.986153', 'step': 11399, 'epoch': 2} {'type': 'loss', 'content': 0.14031504094600677, 'timestamp': '2025-09-10 02:39:20.009866', 'step': 11400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:20.040666', 'step': 11400, 'epoch': 2} {'type': 'loss', 'content': 0.11921954900026321, 'timestamp': '2025-09-10 02:39:20.043524', 'step': 11401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:20.074340', 'step': 11401, 'epoch': 2} {'type': 'loss', 'content': 0.16961681842803955, 'timestamp': '2025-09-10 02:39:20.076811', 'step': 11402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:20.107352', 'step': 11402, 'epoch': 2} {'type': 'loss', 'content': 0.12511107325553894, 'timestamp': '2025-09-10 02:39:20.109907', 'step': 11403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:20.139749', 'step': 11403, 'epoch': 2} {'type': 'loss', 'content': 0.1596328318119049, 'timestamp': '2025-09-10 02:39:20.163490', 'step': 11404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:20.195965', 'step': 11404, 'epoch': 2} {'type': 'loss', 'content': 0.14913955330848694, 'timestamp': '2025-09-10 02:39:20.198329', 'step': 11405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:20.228245', 'step': 11405, 'epoch': 2} {'type': 'loss', 'content': 0.15809790790081024, 'timestamp': '2025-09-10 02:39:20.230727', 'step': 11406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:20.261295', 'step': 11406, 'epoch': 2} {'type': 'loss', 'content': 0.10844207555055618, 'timestamp': '2025-09-10 02:39:20.264048', 'step': 11407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:20.293875', 'step': 11407, 'epoch': 2} {'type': 'loss', 'content': 0.16137006878852844, 'timestamp': '2025-09-10 02:39:20.317873', 'step': 11408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:20.347543', 'step': 11408, 'epoch': 2} {'type': 'loss', 'content': 0.08850660175085068, 'timestamp': '2025-09-10 02:39:20.350170', 'step': 11409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:20.379777', 'step': 11409, 'epoch': 2} {'type': 'loss', 'content': 0.10578741878271103, 'timestamp': '2025-09-10 02:39:20.384592', 'step': 11410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:20.415129', 'step': 11410, 'epoch': 2} {'type': 'loss', 'content': 0.22128166258335114, 'timestamp': '2025-09-10 02:39:20.417744', 'step': 11411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:20.447291', 'step': 11411, 'epoch': 2} {'type': 'loss', 'content': 0.10344097763299942, 'timestamp': '2025-09-10 02:39:20.470817', 'step': 11412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:20.502864', 'step': 11412, 'epoch': 2} {'type': 'loss', 'content': 0.13524284958839417, 'timestamp': '2025-09-10 02:39:20.505238', 'step': 11413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:20.535506', 'step': 11413, 'epoch': 2} {'type': 'loss', 'content': 0.042075611650943756, 'timestamp': '2025-09-10 02:39:20.537834', 'step': 11414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:20.567683', 'step': 11414, 'epoch': 2} {'type': 'loss', 'content': 0.06546396762132645, 'timestamp': '2025-09-10 02:39:20.570123', 'step': 11415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:20.600925', 'step': 11415, 'epoch': 2} {'type': 'loss', 'content': 0.058281708508729935, 'timestamp': '2025-09-10 02:39:20.624857', 'step': 11416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:20.655032', 'step': 11416, 'epoch': 2} {'type': 'loss', 'content': 0.07203735411167145, 'timestamp': '2025-09-10 02:39:20.657549', 'step': 11417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:20.689089', 'step': 11417, 'epoch': 2} {'type': 'loss', 'content': 0.1478119045495987, 'timestamp': '2025-09-10 02:39:20.691551', 'step': 11418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:20.721360', 'step': 11418, 'epoch': 2} {'type': 'loss', 'content': 0.11059702932834625, 'timestamp': '2025-09-10 02:39:20.723599', 'step': 11419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:20.752755', 'step': 11419, 'epoch': 2} {'type': 'loss', 'content': 0.05365508794784546, 'timestamp': '2025-09-10 02:39:20.776244', 'step': 11420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:20.807542', 'step': 11420, 'epoch': 2} {'type': 'loss', 'content': 0.1460508555173874, 'timestamp': '2025-09-10 02:39:20.809987', 'step': 11421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:20.840781', 'step': 11421, 'epoch': 2} {'type': 'loss', 'content': 0.08536084741353989, 'timestamp': '2025-09-10 02:39:20.843149', 'step': 11422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:20.872957', 'step': 11422, 'epoch': 2} {'type': 'loss', 'content': 0.11486192047595978, 'timestamp': '2025-09-10 02:39:20.875217', 'step': 11423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:20.906619', 'step': 11423, 'epoch': 2} {'type': 'loss', 'content': 0.08613421767950058, 'timestamp': '2025-09-10 02:39:20.930191', 'step': 11424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:20.960384', 'step': 11424, 'epoch': 2} {'type': 'loss', 'content': 0.10659812390804291, 'timestamp': '2025-09-10 02:39:20.962631', 'step': 11425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:20.992951', 'step': 11425, 'epoch': 2} {'type': 'loss', 'content': 0.20951059460639954, 'timestamp': '2025-09-10 02:39:20.995353', 'step': 11426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:21.024816', 'step': 11426, 'epoch': 2} {'type': 'loss', 'content': 0.07437271624803543, 'timestamp': '2025-09-10 02:39:21.027079', 'step': 11427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:21.057270', 'step': 11427, 'epoch': 2} {'type': 'loss', 'content': 0.06979480385780334, 'timestamp': '2025-09-10 02:39:21.080913', 'step': 11428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:21.113131', 'step': 11428, 'epoch': 2} {'type': 'loss', 'content': 0.17829012870788574, 'timestamp': '2025-09-10 02:39:21.115218', 'step': 11429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:21.144853', 'step': 11429, 'epoch': 2} {'type': 'loss', 'content': 0.16073088347911835, 'timestamp': '2025-09-10 02:39:21.147282', 'step': 11430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:21.181221', 'step': 11430, 'epoch': 2} {'type': 'loss', 'content': 0.06264995038509369, 'timestamp': '2025-09-10 02:39:21.185105', 'step': 11431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:21.216703', 'step': 11431, 'epoch': 2} {'type': 'loss', 'content': 0.10844220966100693, 'timestamp': '2025-09-10 02:39:21.240647', 'step': 11432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:21.271685', 'step': 11432, 'epoch': 2} {'type': 'loss', 'content': 0.14094848930835724, 'timestamp': '2025-09-10 02:39:21.274876', 'step': 11433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:21.310031', 'step': 11433, 'epoch': 2} {'type': 'loss', 'content': 0.11146625876426697, 'timestamp': '2025-09-10 02:39:21.312573', 'step': 11434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:21.342853', 'step': 11434, 'epoch': 2} {'type': 'loss', 'content': 0.10242271423339844, 'timestamp': '2025-09-10 02:39:21.345655', 'step': 11435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:21.377393', 'step': 11435, 'epoch': 2} {'type': 'loss', 'content': 0.08319384604692459, 'timestamp': '2025-09-10 02:39:21.401063', 'step': 11436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:21.431963', 'step': 11436, 'epoch': 2} {'type': 'loss', 'content': 0.1268283575773239, 'timestamp': '2025-09-10 02:39:21.434554', 'step': 11437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:21.465358', 'step': 11437, 'epoch': 2} {'type': 'loss', 'content': 0.10838226228952408, 'timestamp': '2025-09-10 02:39:21.468467', 'step': 11438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:21.498683', 'step': 11438, 'epoch': 2} {'type': 'loss', 'content': 0.17435911297798157, 'timestamp': '2025-09-10 02:39:21.500994', 'step': 11439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:21.530965', 'step': 11439, 'epoch': 2} {'type': 'loss', 'content': 0.095843605697155, 'timestamp': '2025-09-10 02:39:21.554356', 'step': 11440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:21.585067', 'step': 11440, 'epoch': 2} {'type': 'loss', 'content': 0.06568687409162521, 'timestamp': '2025-09-10 02:39:21.589421', 'step': 11441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:39:21.621750', 'step': 11441, 'epoch': 2} {'type': 'loss', 'content': 0.10348285734653473, 'timestamp': '2025-09-10 02:39:21.626301', 'step': 11442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:21.656317', 'step': 11442, 'epoch': 2} {'type': 'loss', 'content': 0.08235479891300201, 'timestamp': '2025-09-10 02:39:21.658707', 'step': 11443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:21.689032', 'step': 11443, 'epoch': 2} {'type': 'loss', 'content': 0.05034426972270012, 'timestamp': '2025-09-10 02:39:21.713223', 'step': 11444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:21.745131', 'step': 11444, 'epoch': 2} {'type': 'loss', 'content': 0.10113601386547089, 'timestamp': '2025-09-10 02:39:21.747575', 'step': 11445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:21.778236', 'step': 11445, 'epoch': 2} {'type': 'loss', 'content': 0.04943202808499336, 'timestamp': '2025-09-10 02:39:21.780556', 'step': 11446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:21.812781', 'step': 11446, 'epoch': 2} {'type': 'loss', 'content': 0.08330804854631424, 'timestamp': '2025-09-10 02:39:21.815703', 'step': 11447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:21.845904', 'step': 11447, 'epoch': 2} {'type': 'loss', 'content': 0.07500766962766647, 'timestamp': '2025-09-10 02:39:21.869605', 'step': 11448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:21.901216', 'step': 11448, 'epoch': 2} {'type': 'loss', 'content': 0.1021827682852745, 'timestamp': '2025-09-10 02:39:21.904387', 'step': 11449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:39:21.935860', 'step': 11449, 'epoch': 2} {'type': 'loss', 'content': 0.07536790519952774, 'timestamp': '2025-09-10 02:39:21.940138', 'step': 11450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:21.970131', 'step': 11450, 'epoch': 2} {'type': 'loss', 'content': 0.11636493355035782, 'timestamp': '2025-09-10 02:39:21.972484', 'step': 11451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:22.002314', 'step': 11451, 'epoch': 2} {'type': 'loss', 'content': 0.0494806170463562, 'timestamp': '2025-09-10 02:39:22.026261', 'step': 11452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:22.055962', 'step': 11452, 'epoch': 2} {'type': 'loss', 'content': 0.06426264345645905, 'timestamp': '2025-09-10 02:39:22.058474', 'step': 11453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:22.087969', 'step': 11453, 'epoch': 2} {'type': 'loss', 'content': 0.06233292818069458, 'timestamp': '2025-09-10 02:39:22.090256', 'step': 11454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:22.123684', 'step': 11454, 'epoch': 2} {'type': 'loss', 'content': 0.03910579904913902, 'timestamp': '2025-09-10 02:39:22.125901', 'step': 11455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:22.155738', 'step': 11455, 'epoch': 2} {'type': 'loss', 'content': 0.12534967064857483, 'timestamp': '2025-09-10 02:39:22.179480', 'step': 11456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:22.210055', 'step': 11456, 'epoch': 2} {'type': 'loss', 'content': 0.06613211333751678, 'timestamp': '2025-09-10 02:39:22.212844', 'step': 11457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:22.243142', 'step': 11457, 'epoch': 2} {'type': 'loss', 'content': 0.19649754464626312, 'timestamp': '2025-09-10 02:39:22.245531', 'step': 11458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:22.276083', 'step': 11458, 'epoch': 2} {'type': 'loss', 'content': 0.09814368933439255, 'timestamp': '2025-09-10 02:39:22.278474', 'step': 11459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:22.308422', 'step': 11459, 'epoch': 2} {'type': 'loss', 'content': 0.15032248198986053, 'timestamp': '2025-09-10 02:39:22.332180', 'step': 11460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:22.363406', 'step': 11460, 'epoch': 2} {'type': 'loss', 'content': 0.11668137460947037, 'timestamp': '2025-09-10 02:39:22.365705', 'step': 11461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:22.396072', 'step': 11461, 'epoch': 2} {'type': 'loss', 'content': 0.06129055470228195, 'timestamp': '2025-09-10 02:39:22.399875', 'step': 11462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:22.429551', 'step': 11462, 'epoch': 2} {'type': 'loss', 'content': 0.10676514357328415, 'timestamp': '2025-09-10 02:39:22.435168', 'step': 11463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:22.466188', 'step': 11463, 'epoch': 2} {'type': 'loss', 'content': 0.0842587873339653, 'timestamp': '2025-09-10 02:39:22.490039', 'step': 11464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:22.525072', 'step': 11464, 'epoch': 2} {'type': 'loss', 'content': 0.19338113069534302, 'timestamp': '2025-09-10 02:39:22.527346', 'step': 11465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:22.557378', 'step': 11465, 'epoch': 2} {'type': 'loss', 'content': 0.17809617519378662, 'timestamp': '2025-09-10 02:39:22.560098', 'step': 11466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:22.591351', 'step': 11466, 'epoch': 2} {'type': 'loss', 'content': 0.11073774099349976, 'timestamp': '2025-09-10 02:39:22.594146', 'step': 11467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:22.626268', 'step': 11467, 'epoch': 2} {'type': 'loss', 'content': 0.08582817018032074, 'timestamp': '2025-09-10 02:39:22.652033', 'step': 11468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:22.686987', 'step': 11468, 'epoch': 2} {'type': 'loss', 'content': 0.17336155474185944, 'timestamp': '2025-09-10 02:39:22.692516', 'step': 11469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:22.729293', 'step': 11469, 'epoch': 2} {'type': 'loss', 'content': 0.08556536585092545, 'timestamp': '2025-09-10 02:39:22.731662', 'step': 11470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:22.761298', 'step': 11470, 'epoch': 2} {'type': 'loss', 'content': 0.11045458167791367, 'timestamp': '2025-09-10 02:39:22.763844', 'step': 11471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:22.798389', 'step': 11471, 'epoch': 2} {'type': 'loss', 'content': 0.10534889996051788, 'timestamp': '2025-09-10 02:39:22.821924', 'step': 11472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:22.855162', 'step': 11472, 'epoch': 2} {'type': 'loss', 'content': 0.19510634243488312, 'timestamp': '2025-09-10 02:39:22.857783', 'step': 11473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:22.889495', 'step': 11473, 'epoch': 2} {'type': 'loss', 'content': 0.08508729189634323, 'timestamp': '2025-09-10 02:39:22.894451', 'step': 11474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:22.925435', 'step': 11474, 'epoch': 2} {'type': 'loss', 'content': 0.11859346181154251, 'timestamp': '2025-09-10 02:39:22.930054', 'step': 11475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:22.961454', 'step': 11475, 'epoch': 2} {'type': 'loss', 'content': 0.12152083963155746, 'timestamp': '2025-09-10 02:39:22.984962', 'step': 11476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.023276', 'step': 11476, 'epoch': 2} {'type': 'loss', 'content': 0.04763327166438103, 'timestamp': '2025-09-10 02:39:23.025325', 'step': 11477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:23.055517', 'step': 11477, 'epoch': 2} {'type': 'loss', 'content': 0.13083194196224213, 'timestamp': '2025-09-10 02:39:23.057892', 'step': 11478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.087764', 'step': 11478, 'epoch': 2} {'type': 'loss', 'content': 0.11644774675369263, 'timestamp': '2025-09-10 02:39:23.090316', 'step': 11479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.120322', 'step': 11479, 'epoch': 2} {'type': 'loss', 'content': 0.052140846848487854, 'timestamp': '2025-09-10 02:39:23.145850', 'step': 11480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:23.176883', 'step': 11480, 'epoch': 2} {'type': 'loss', 'content': 0.10183385014533997, 'timestamp': '2025-09-10 02:39:23.179429', 'step': 11481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:23.212318', 'step': 11481, 'epoch': 2} {'type': 'loss', 'content': 0.2311655879020691, 'timestamp': '2025-09-10 02:39:23.214850', 'step': 11482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.244734', 'step': 11482, 'epoch': 2} {'type': 'loss', 'content': 0.11155179142951965, 'timestamp': '2025-09-10 02:39:23.247015', 'step': 11483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:23.279627', 'step': 11483, 'epoch': 2} {'type': 'loss', 'content': 0.11649128049612045, 'timestamp': '2025-09-10 02:39:23.303440', 'step': 11484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.335186', 'step': 11484, 'epoch': 2} {'type': 'loss', 'content': 0.04309339076280594, 'timestamp': '2025-09-10 02:39:23.337681', 'step': 11485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:23.367812', 'step': 11485, 'epoch': 2} {'type': 'loss', 'content': 0.10545117408037186, 'timestamp': '2025-09-10 02:39:23.370713', 'step': 11486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.400584', 'step': 11486, 'epoch': 2} {'type': 'loss', 'content': 0.05867745354771614, 'timestamp': '2025-09-10 02:39:23.402918', 'step': 11487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:23.432755', 'step': 11487, 'epoch': 2} {'type': 'loss', 'content': 0.06731107831001282, 'timestamp': '2025-09-10 02:39:23.456204', 'step': 11488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.485871', 'step': 11488, 'epoch': 2} {'type': 'loss', 'content': 0.1542012095451355, 'timestamp': '2025-09-10 02:39:23.489573', 'step': 11489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:23.520229', 'step': 11489, 'epoch': 2} {'type': 'loss', 'content': 0.1133645698428154, 'timestamp': '2025-09-10 02:39:23.522607', 'step': 11490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:23.552797', 'step': 11490, 'epoch': 2} {'type': 'loss', 'content': 0.09147858619689941, 'timestamp': '2025-09-10 02:39:23.555028', 'step': 11491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:23.584614', 'step': 11491, 'epoch': 2} {'type': 'loss', 'content': 0.10685458779335022, 'timestamp': '2025-09-10 02:39:23.608295', 'step': 11492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:23.640639', 'step': 11492, 'epoch': 2} {'type': 'loss', 'content': 0.12117008864879608, 'timestamp': '2025-09-10 02:39:23.643290', 'step': 11493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:23.674133', 'step': 11493, 'epoch': 2} {'type': 'loss', 'content': 0.14511191844940186, 'timestamp': '2025-09-10 02:39:23.676686', 'step': 11494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.707724', 'step': 11494, 'epoch': 2} {'type': 'loss', 'content': 0.09396029263734818, 'timestamp': '2025-09-10 02:39:23.717536', 'step': 11495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:23.751150', 'step': 11495, 'epoch': 2} {'type': 'loss', 'content': 0.1224064826965332, 'timestamp': '2025-09-10 02:39:23.775364', 'step': 11496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.807954', 'step': 11496, 'epoch': 2} {'type': 'loss', 'content': 0.10273049026727676, 'timestamp': '2025-09-10 02:39:23.811290', 'step': 11497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.845569', 'step': 11497, 'epoch': 2} {'type': 'loss', 'content': 0.07609505206346512, 'timestamp': '2025-09-10 02:39:23.849206', 'step': 11498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:23.882477', 'step': 11498, 'epoch': 2} {'type': 'loss', 'content': 0.119806207716465, 'timestamp': '2025-09-10 02:39:23.886414', 'step': 11499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:23.918828', 'step': 11499, 'epoch': 2} {'type': 'loss', 'content': 0.04681767150759697, 'timestamp': '2025-09-10 02:39:23.946882', 'step': 11500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11500', 'timestamp': '2025-09-10 02:39:30.238009', 'step': 11500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.284024', 'step': 11500, 'epoch': 2} {'type': 'loss', 'content': 0.12860684096813202, 'timestamp': '2025-09-10 02:39:30.286659', 'step': 11501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.317883', 'step': 11501, 'epoch': 2} {'type': 'loss', 'content': 0.11932823807001114, 'timestamp': '2025-09-10 02:39:30.320415', 'step': 11502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.351388', 'step': 11502, 'epoch': 2} {'type': 'loss', 'content': 0.16064873337745667, 'timestamp': '2025-09-10 02:39:30.353938', 'step': 11503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:30.383955', 'step': 11503, 'epoch': 2} {'type': 'loss', 'content': 0.07785316556692123, 'timestamp': '2025-09-10 02:39:30.408116', 'step': 11504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:30.440038', 'step': 11504, 'epoch': 2} {'type': 'loss', 'content': 0.08846587687730789, 'timestamp': '2025-09-10 02:39:30.442409', 'step': 11505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.471759', 'step': 11505, 'epoch': 2} {'type': 'loss', 'content': 0.04834824427962303, 'timestamp': '2025-09-10 02:39:30.474177', 'step': 11506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:30.504205', 'step': 11506, 'epoch': 2} {'type': 'loss', 'content': 0.07127583026885986, 'timestamp': '2025-09-10 02:39:30.507442', 'step': 11507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:30.537769', 'step': 11507, 'epoch': 2} {'type': 'loss', 'content': 0.12279833853244781, 'timestamp': '2025-09-10 02:39:30.561468', 'step': 11508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.592418', 'step': 11508, 'epoch': 2} {'type': 'loss', 'content': 0.06822855025529861, 'timestamp': '2025-09-10 02:39:30.594741', 'step': 11509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.624661', 'step': 11509, 'epoch': 2} {'type': 'loss', 'content': 0.1743166446685791, 'timestamp': '2025-09-10 02:39:30.627253', 'step': 11510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.657270', 'step': 11510, 'epoch': 2} {'type': 'loss', 'content': 0.04920234531164169, 'timestamp': '2025-09-10 02:39:30.659636', 'step': 11511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:30.690702', 'step': 11511, 'epoch': 2} {'type': 'loss', 'content': 0.12106461822986603, 'timestamp': '2025-09-10 02:39:30.714764', 'step': 11512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:30.745377', 'step': 11512, 'epoch': 2} {'type': 'loss', 'content': 0.04018621891736984, 'timestamp': '2025-09-10 02:39:30.748339', 'step': 11513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.778973', 'step': 11513, 'epoch': 2} {'type': 'loss', 'content': 0.1235135868191719, 'timestamp': '2025-09-10 02:39:30.781348', 'step': 11514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:30.811644', 'step': 11514, 'epoch': 2} {'type': 'loss', 'content': 0.08687026053667068, 'timestamp': '2025-09-10 02:39:30.814228', 'step': 11515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.845149', 'step': 11515, 'epoch': 2} {'type': 'loss', 'content': 0.0726722702383995, 'timestamp': '2025-09-10 02:39:30.869658', 'step': 11516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.900973', 'step': 11516, 'epoch': 2} {'type': 'loss', 'content': 0.13963046669960022, 'timestamp': '2025-09-10 02:39:30.903368', 'step': 11517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:30.935119', 'step': 11517, 'epoch': 2} {'type': 'loss', 'content': 0.11415334790945053, 'timestamp': '2025-09-10 02:39:30.937821', 'step': 11518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:30.972124', 'step': 11518, 'epoch': 2} {'type': 'loss', 'content': 0.1363629698753357, 'timestamp': '2025-09-10 02:39:30.974807', 'step': 11519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.005339', 'step': 11519, 'epoch': 2} {'type': 'loss', 'content': 0.14539197087287903, 'timestamp': '2025-09-10 02:39:31.031010', 'step': 11520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:31.061440', 'step': 11520, 'epoch': 2} {'type': 'loss', 'content': 0.16443012654781342, 'timestamp': '2025-09-10 02:39:31.063704', 'step': 11521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:31.094717', 'step': 11521, 'epoch': 2} {'type': 'loss', 'content': 0.1279342770576477, 'timestamp': '2025-09-10 02:39:31.097332', 'step': 11522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.127637', 'step': 11522, 'epoch': 2} {'type': 'loss', 'content': 0.1102912425994873, 'timestamp': '2025-09-10 02:39:31.130730', 'step': 11523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:31.161600', 'step': 11523, 'epoch': 2} {'type': 'loss', 'content': 0.1260727196931839, 'timestamp': '2025-09-10 02:39:31.185495', 'step': 11524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.216623', 'step': 11524, 'epoch': 2} {'type': 'loss', 'content': 0.07025374472141266, 'timestamp': '2025-09-10 02:39:31.219305', 'step': 11525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.250444', 'step': 11525, 'epoch': 2} {'type': 'loss', 'content': 0.09790852665901184, 'timestamp': '2025-09-10 02:39:31.253439', 'step': 11526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:31.284970', 'step': 11526, 'epoch': 2} {'type': 'loss', 'content': 0.07951090484857559, 'timestamp': '2025-09-10 02:39:31.287706', 'step': 11527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.318739', 'step': 11527, 'epoch': 2} {'type': 'loss', 'content': 0.06580943614244461, 'timestamp': '2025-09-10 02:39:31.342511', 'step': 11528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:31.374049', 'step': 11528, 'epoch': 2} {'type': 'loss', 'content': 0.12802325189113617, 'timestamp': '2025-09-10 02:39:31.376429', 'step': 11529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:31.409529', 'step': 11529, 'epoch': 2} {'type': 'loss', 'content': 0.0626668930053711, 'timestamp': '2025-09-10 02:39:31.412227', 'step': 11530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.444320', 'step': 11530, 'epoch': 2} {'type': 'loss', 'content': 0.06768280267715454, 'timestamp': '2025-09-10 02:39:31.447034', 'step': 11531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.477321', 'step': 11531, 'epoch': 2} {'type': 'loss', 'content': 0.1086689755320549, 'timestamp': '2025-09-10 02:39:31.501633', 'step': 11532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.536390', 'step': 11532, 'epoch': 2} {'type': 'loss', 'content': 0.09743177145719528, 'timestamp': '2025-09-10 02:39:31.539444', 'step': 11533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:31.571645', 'step': 11533, 'epoch': 2} {'type': 'loss', 'content': 0.04137105494737625, 'timestamp': '2025-09-10 02:39:31.574469', 'step': 11534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:31.606131', 'step': 11534, 'epoch': 2} {'type': 'loss', 'content': 0.1313774585723877, 'timestamp': '2025-09-10 02:39:31.609644', 'step': 11535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:31.643295', 'step': 11535, 'epoch': 2} {'type': 'loss', 'content': 0.13469842076301575, 'timestamp': '2025-09-10 02:39:31.667587', 'step': 11536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:31.698825', 'step': 11536, 'epoch': 2} {'type': 'loss', 'content': 0.128584623336792, 'timestamp': '2025-09-10 02:39:31.701281', 'step': 11537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.731408', 'step': 11537, 'epoch': 2} {'type': 'loss', 'content': 0.08634231984615326, 'timestamp': '2025-09-10 02:39:31.733975', 'step': 11538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.764264', 'step': 11538, 'epoch': 2} {'type': 'loss', 'content': 0.03983798623085022, 'timestamp': '2025-09-10 02:39:31.766812', 'step': 11539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.796794', 'step': 11539, 'epoch': 2} {'type': 'loss', 'content': 0.17743542790412903, 'timestamp': '2025-09-10 02:39:31.820549', 'step': 11540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.850890', 'step': 11540, 'epoch': 2} {'type': 'loss', 'content': 0.0794786885380745, 'timestamp': '2025-09-10 02:39:31.853913', 'step': 11541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.883069', 'step': 11541, 'epoch': 2} {'type': 'loss', 'content': 0.13590896129608154, 'timestamp': '2025-09-10 02:39:31.885770', 'step': 11542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:31.915977', 'step': 11542, 'epoch': 2} {'type': 'loss', 'content': 0.06092078611254692, 'timestamp': '2025-09-10 02:39:31.918477', 'step': 11543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:31.949905', 'step': 11543, 'epoch': 2} {'type': 'loss', 'content': 0.0981331393122673, 'timestamp': '2025-09-10 02:39:31.973665', 'step': 11544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:32.003686', 'step': 11544, 'epoch': 2} {'type': 'loss', 'content': 0.12700717151165009, 'timestamp': '2025-09-10 02:39:32.006100', 'step': 11545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.037891', 'step': 11545, 'epoch': 2} {'type': 'loss', 'content': 0.0988873615860939, 'timestamp': '2025-09-10 02:39:32.040667', 'step': 11546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:32.071806', 'step': 11546, 'epoch': 2} {'type': 'loss', 'content': 0.13924016058444977, 'timestamp': '2025-09-10 02:39:32.074345', 'step': 11547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:32.103892', 'step': 11547, 'epoch': 2} {'type': 'loss', 'content': 0.11636266857385635, 'timestamp': '2025-09-10 02:39:32.127485', 'step': 11548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.158429', 'step': 11548, 'epoch': 2} {'type': 'loss', 'content': 0.0635974258184433, 'timestamp': '2025-09-10 02:39:32.160895', 'step': 11549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:32.191369', 'step': 11549, 'epoch': 2} {'type': 'loss', 'content': 0.0383136048913002, 'timestamp': '2025-09-10 02:39:32.193775', 'step': 11550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:32.223766', 'step': 11550, 'epoch': 2} {'type': 'loss', 'content': 0.09314191341400146, 'timestamp': '2025-09-10 02:39:32.226324', 'step': 11551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:32.257577', 'step': 11551, 'epoch': 2} {'type': 'loss', 'content': 0.0625043660402298, 'timestamp': '2025-09-10 02:39:32.281514', 'step': 11552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:32.311525', 'step': 11552, 'epoch': 2} {'type': 'loss', 'content': 0.07627186924219131, 'timestamp': '2025-09-10 02:39:32.313865', 'step': 11553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.343887', 'step': 11553, 'epoch': 2} {'type': 'loss', 'content': 0.06176498159766197, 'timestamp': '2025-09-10 02:39:32.346504', 'step': 11554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:32.376420', 'step': 11554, 'epoch': 2} {'type': 'loss', 'content': 0.1126522347331047, 'timestamp': '2025-09-10 02:39:32.379842', 'step': 11555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.413394', 'step': 11555, 'epoch': 2} {'type': 'loss', 'content': 0.13839861750602722, 'timestamp': '2025-09-10 02:39:32.437153', 'step': 11556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.467783', 'step': 11556, 'epoch': 2} {'type': 'loss', 'content': 0.08585472404956818, 'timestamp': '2025-09-10 02:39:32.470362', 'step': 11557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:32.500326', 'step': 11557, 'epoch': 2} {'type': 'loss', 'content': 0.12037771195173264, 'timestamp': '2025-09-10 02:39:32.503240', 'step': 11558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.533508', 'step': 11558, 'epoch': 2} {'type': 'loss', 'content': 0.1358605921268463, 'timestamp': '2025-09-10 02:39:32.535890', 'step': 11559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.565803', 'step': 11559, 'epoch': 2} {'type': 'loss', 'content': 0.07747258991003036, 'timestamp': '2025-09-10 02:39:32.589596', 'step': 11560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.619968', 'step': 11560, 'epoch': 2} {'type': 'loss', 'content': 0.06269359588623047, 'timestamp': '2025-09-10 02:39:32.622335', 'step': 11561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:32.653967', 'step': 11561, 'epoch': 2} {'type': 'loss', 'content': 0.10055362433195114, 'timestamp': '2025-09-10 02:39:32.656462', 'step': 11562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.687199', 'step': 11562, 'epoch': 2} {'type': 'loss', 'content': 0.12268444150686264, 'timestamp': '2025-09-10 02:39:32.689818', 'step': 11563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:32.720567', 'step': 11563, 'epoch': 2} {'type': 'loss', 'content': 0.10474711656570435, 'timestamp': '2025-09-10 02:39:32.744411', 'step': 11564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:32.775165', 'step': 11564, 'epoch': 2} {'type': 'loss', 'content': 0.05745808407664299, 'timestamp': '2025-09-10 02:39:32.777490', 'step': 11565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:32.807693', 'step': 11565, 'epoch': 2} {'type': 'loss', 'content': 0.12056014686822891, 'timestamp': '2025-09-10 02:39:32.810592', 'step': 11566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:32.841359', 'step': 11566, 'epoch': 2} {'type': 'loss', 'content': 0.1590339094400406, 'timestamp': '2025-09-10 02:39:32.843643', 'step': 11567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:32.874082', 'step': 11567, 'epoch': 2} {'type': 'loss', 'content': 0.09372055530548096, 'timestamp': '2025-09-10 02:39:32.898098', 'step': 11568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:32.928442', 'step': 11568, 'epoch': 2} {'type': 'loss', 'content': 0.09558282047510147, 'timestamp': '2025-09-10 02:39:32.930982', 'step': 11569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:32.962580', 'step': 11569, 'epoch': 2} {'type': 'loss', 'content': 0.08919430524110794, 'timestamp': '2025-09-10 02:39:32.965500', 'step': 11570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:32.996368', 'step': 11570, 'epoch': 2} {'type': 'loss', 'content': 0.18647991120815277, 'timestamp': '2025-09-10 02:39:32.998713', 'step': 11571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:33.028786', 'step': 11571, 'epoch': 2} {'type': 'loss', 'content': 0.059138260781764984, 'timestamp': '2025-09-10 02:39:33.052317', 'step': 11572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.082039', 'step': 11572, 'epoch': 2} {'type': 'loss', 'content': 0.06271608173847198, 'timestamp': '2025-09-10 02:39:33.084272', 'step': 11573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:33.114166', 'step': 11573, 'epoch': 2} {'type': 'loss', 'content': 0.07636809349060059, 'timestamp': '2025-09-10 02:39:33.117153', 'step': 11574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:33.146950', 'step': 11574, 'epoch': 2} {'type': 'loss', 'content': 0.1588343381881714, 'timestamp': '2025-09-10 02:39:33.149752', 'step': 11575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:33.180121', 'step': 11575, 'epoch': 2} {'type': 'loss', 'content': 0.02212025783956051, 'timestamp': '2025-09-10 02:39:33.203641', 'step': 11576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:33.234958', 'step': 11576, 'epoch': 2} {'type': 'loss', 'content': 0.12459207326173782, 'timestamp': '2025-09-10 02:39:33.239189', 'step': 11577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:33.269673', 'step': 11577, 'epoch': 2} {'type': 'loss', 'content': 0.05632761865854263, 'timestamp': '2025-09-10 02:39:33.272603', 'step': 11578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.303113', 'step': 11578, 'epoch': 2} {'type': 'loss', 'content': 0.15902569890022278, 'timestamp': '2025-09-10 02:39:33.305649', 'step': 11579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.335570', 'step': 11579, 'epoch': 2} {'type': 'loss', 'content': 0.034430913627147675, 'timestamp': '2025-09-10 02:39:33.360621', 'step': 11580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.390616', 'step': 11580, 'epoch': 2} {'type': 'loss', 'content': 0.13668349385261536, 'timestamp': '2025-09-10 02:39:33.393288', 'step': 11581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:33.423536', 'step': 11581, 'epoch': 2} {'type': 'loss', 'content': 0.09478713572025299, 'timestamp': '2025-09-10 02:39:33.426405', 'step': 11582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.457428', 'step': 11582, 'epoch': 2} {'type': 'loss', 'content': 0.15850332379341125, 'timestamp': '2025-09-10 02:39:33.460008', 'step': 11583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.490234', 'step': 11583, 'epoch': 2} {'type': 'loss', 'content': 0.13466928899288177, 'timestamp': '2025-09-10 02:39:33.513827', 'step': 11584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:33.545123', 'step': 11584, 'epoch': 2} {'type': 'loss', 'content': 0.13989488780498505, 'timestamp': '2025-09-10 02:39:33.547644', 'step': 11585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.577809', 'step': 11585, 'epoch': 2} {'type': 'loss', 'content': 0.10913311690092087, 'timestamp': '2025-09-10 02:39:33.580357', 'step': 11586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:33.610368', 'step': 11586, 'epoch': 2} {'type': 'loss', 'content': 0.08315674215555191, 'timestamp': '2025-09-10 02:39:33.612904', 'step': 11587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:33.642855', 'step': 11587, 'epoch': 2} {'type': 'loss', 'content': 0.06484442204236984, 'timestamp': '2025-09-10 02:39:33.666441', 'step': 11588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:33.696955', 'step': 11588, 'epoch': 2} {'type': 'loss', 'content': 0.04195648804306984, 'timestamp': '2025-09-10 02:39:33.699560', 'step': 11589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:33.730395', 'step': 11589, 'epoch': 2} {'type': 'loss', 'content': 0.14876404404640198, 'timestamp': '2025-09-10 02:39:33.732779', 'step': 11590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.763610', 'step': 11590, 'epoch': 2} {'type': 'loss', 'content': 0.12147697061300278, 'timestamp': '2025-09-10 02:39:33.766015', 'step': 11591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:33.795969', 'step': 11591, 'epoch': 2} {'type': 'loss', 'content': 0.09581024199724197, 'timestamp': '2025-09-10 02:39:33.819779', 'step': 11592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:33.849687', 'step': 11592, 'epoch': 2} {'type': 'loss', 'content': 0.12072864919900894, 'timestamp': '2025-09-10 02:39:33.852177', 'step': 11593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:33.883356', 'step': 11593, 'epoch': 2} {'type': 'loss', 'content': 0.14213888347148895, 'timestamp': '2025-09-10 02:39:33.886399', 'step': 11594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.917020', 'step': 11594, 'epoch': 2} {'type': 'loss', 'content': 0.07770300656557083, 'timestamp': '2025-09-10 02:39:33.919980', 'step': 11595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:33.950388', 'step': 11595, 'epoch': 2} {'type': 'loss', 'content': 0.15587462484836578, 'timestamp': '2025-09-10 02:39:33.977850', 'step': 11596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.008118', 'step': 11596, 'epoch': 2} {'type': 'loss', 'content': 0.10367938876152039, 'timestamp': '2025-09-10 02:39:34.010495', 'step': 11597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:34.040824', 'step': 11597, 'epoch': 2} {'type': 'loss', 'content': 0.1335873156785965, 'timestamp': '2025-09-10 02:39:34.043506', 'step': 11598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:34.075041', 'step': 11598, 'epoch': 2} {'type': 'loss', 'content': 0.13588131964206696, 'timestamp': '2025-09-10 02:39:34.078247', 'step': 11599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.108951', 'step': 11599, 'epoch': 2} {'type': 'loss', 'content': 0.06809080392122269, 'timestamp': '2025-09-10 02:39:34.133042', 'step': 11600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:34.163933', 'step': 11600, 'epoch': 2} {'type': 'loss', 'content': 0.04320738464593887, 'timestamp': '2025-09-10 02:39:34.166572', 'step': 11601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.196140', 'step': 11601, 'epoch': 2} {'type': 'loss', 'content': 0.0640038326382637, 'timestamp': '2025-09-10 02:39:34.198556', 'step': 11602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:34.228719', 'step': 11602, 'epoch': 2} {'type': 'loss', 'content': 0.08968870341777802, 'timestamp': '2025-09-10 02:39:34.231229', 'step': 11603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.261138', 'step': 11603, 'epoch': 2} {'type': 'loss', 'content': 0.1330275982618332, 'timestamp': '2025-09-10 02:39:34.285227', 'step': 11604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:34.316932', 'step': 11604, 'epoch': 2} {'type': 'loss', 'content': 0.0851370319724083, 'timestamp': '2025-09-10 02:39:34.319532', 'step': 11605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.349857', 'step': 11605, 'epoch': 2} {'type': 'loss', 'content': 0.10708736628293991, 'timestamp': '2025-09-10 02:39:34.353427', 'step': 11606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:34.384077', 'step': 11606, 'epoch': 2} {'type': 'loss', 'content': 0.11171110719442368, 'timestamp': '2025-09-10 02:39:34.386864', 'step': 11607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.419659', 'step': 11607, 'epoch': 2} {'type': 'loss', 'content': 0.07274025678634644, 'timestamp': '2025-09-10 02:39:34.443483', 'step': 11608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:34.474612', 'step': 11608, 'epoch': 2} {'type': 'loss', 'content': 0.08025875687599182, 'timestamp': '2025-09-10 02:39:34.477236', 'step': 11609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:34.508361', 'step': 11609, 'epoch': 2} {'type': 'loss', 'content': 0.08501487970352173, 'timestamp': '2025-09-10 02:39:34.511118', 'step': 11610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:34.541658', 'step': 11610, 'epoch': 2} {'type': 'loss', 'content': 0.12043815106153488, 'timestamp': '2025-09-10 02:39:34.544188', 'step': 11611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:34.573949', 'step': 11611, 'epoch': 2} {'type': 'loss', 'content': 0.08994660526514053, 'timestamp': '2025-09-10 02:39:34.597756', 'step': 11612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:34.628691', 'step': 11612, 'epoch': 2} {'type': 'loss', 'content': 0.11648702621459961, 'timestamp': '2025-09-10 02:39:34.631355', 'step': 11613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:34.660975', 'step': 11613, 'epoch': 2} {'type': 'loss', 'content': 0.1262955367565155, 'timestamp': '2025-09-10 02:39:34.663092', 'step': 11614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.692847', 'step': 11614, 'epoch': 2} {'type': 'loss', 'content': 0.04198800399899483, 'timestamp': '2025-09-10 02:39:34.695740', 'step': 11615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.726434', 'step': 11615, 'epoch': 2} {'type': 'loss', 'content': 0.08509594202041626, 'timestamp': '2025-09-10 02:39:34.750766', 'step': 11616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:34.782523', 'step': 11616, 'epoch': 2} {'type': 'loss', 'content': 0.15241318941116333, 'timestamp': '2025-09-10 02:39:34.785098', 'step': 11617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:34.815402', 'step': 11617, 'epoch': 2} {'type': 'loss', 'content': 0.09383931010961533, 'timestamp': '2025-09-10 02:39:34.818240', 'step': 11618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.848754', 'step': 11618, 'epoch': 2} {'type': 'loss', 'content': 0.12472827732563019, 'timestamp': '2025-09-10 02:39:34.851343', 'step': 11619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.883043', 'step': 11619, 'epoch': 2} {'type': 'loss', 'content': 0.14431840181350708, 'timestamp': '2025-09-10 02:39:34.906906', 'step': 11620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:34.941117', 'step': 11620, 'epoch': 2} {'type': 'loss', 'content': 0.06818165630102158, 'timestamp': '2025-09-10 02:39:34.943900', 'step': 11621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:34.974893', 'step': 11621, 'epoch': 2} {'type': 'loss', 'content': 0.1258399337530136, 'timestamp': '2025-09-10 02:39:34.977329', 'step': 11622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.007079', 'step': 11622, 'epoch': 2} {'type': 'loss', 'content': 0.1633017510175705, 'timestamp': '2025-09-10 02:39:35.009670', 'step': 11623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:35.040304', 'step': 11623, 'epoch': 2} {'type': 'loss', 'content': 0.03562565892934799, 'timestamp': '2025-09-10 02:39:35.064276', 'step': 11624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.095227', 'step': 11624, 'epoch': 2} {'type': 'loss', 'content': 0.11531950533390045, 'timestamp': '2025-09-10 02:39:35.097780', 'step': 11625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.127612', 'step': 11625, 'epoch': 2} {'type': 'loss', 'content': 0.10202071070671082, 'timestamp': '2025-09-10 02:39:35.130268', 'step': 11626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.161391', 'step': 11626, 'epoch': 2} {'type': 'loss', 'content': 0.08100076764822006, 'timestamp': '2025-09-10 02:39:35.164302', 'step': 11627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.194298', 'step': 11627, 'epoch': 2} {'type': 'loss', 'content': 0.05807121843099594, 'timestamp': '2025-09-10 02:39:35.218308', 'step': 11628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:35.248762', 'step': 11628, 'epoch': 2} {'type': 'loss', 'content': 0.05729405954480171, 'timestamp': '2025-09-10 02:39:35.252167', 'step': 11629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:35.283669', 'step': 11629, 'epoch': 2} {'type': 'loss', 'content': 0.08171619474887848, 'timestamp': '2025-09-10 02:39:35.286466', 'step': 11630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:35.317363', 'step': 11630, 'epoch': 2} {'type': 'loss', 'content': 0.029621316120028496, 'timestamp': '2025-09-10 02:39:35.319996', 'step': 11631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:35.350258', 'step': 11631, 'epoch': 2} {'type': 'loss', 'content': 0.09649084508419037, 'timestamp': '2025-09-10 02:39:35.374503', 'step': 11632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:35.405626', 'step': 11632, 'epoch': 2} {'type': 'loss', 'content': 0.08769211173057556, 'timestamp': '2025-09-10 02:39:35.408067', 'step': 11633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:35.437614', 'step': 11633, 'epoch': 2} {'type': 'loss', 'content': 0.06893043220043182, 'timestamp': '2025-09-10 02:39:35.440067', 'step': 11634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.470615', 'step': 11634, 'epoch': 2} {'type': 'loss', 'content': 0.076673723757267, 'timestamp': '2025-09-10 02:39:35.473579', 'step': 11635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:35.503790', 'step': 11635, 'epoch': 2} {'type': 'loss', 'content': 0.08122923970222473, 'timestamp': '2025-09-10 02:39:35.527469', 'step': 11636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.557940', 'step': 11636, 'epoch': 2} {'type': 'loss', 'content': 0.09054379910230637, 'timestamp': '2025-09-10 02:39:35.560425', 'step': 11637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.592419', 'step': 11637, 'epoch': 2} {'type': 'loss', 'content': 0.11675065010786057, 'timestamp': '2025-09-10 02:39:35.595124', 'step': 11638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:35.625634', 'step': 11638, 'epoch': 2} {'type': 'loss', 'content': 0.1367638111114502, 'timestamp': '2025-09-10 02:39:35.628001', 'step': 11639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:35.658235', 'step': 11639, 'epoch': 2} {'type': 'loss', 'content': 0.10179581493139267, 'timestamp': '2025-09-10 02:39:35.681839', 'step': 11640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.713087', 'step': 11640, 'epoch': 2} {'type': 'loss', 'content': 0.03263905644416809, 'timestamp': '2025-09-10 02:39:35.715630', 'step': 11641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.746047', 'step': 11641, 'epoch': 2} {'type': 'loss', 'content': 0.08164830505847931, 'timestamp': '2025-09-10 02:39:35.748569', 'step': 11642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.779212', 'step': 11642, 'epoch': 2} {'type': 'loss', 'content': 0.15751856565475464, 'timestamp': '2025-09-10 02:39:35.781721', 'step': 11643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:35.811902', 'step': 11643, 'epoch': 2} {'type': 'loss', 'content': 0.07649718970060349, 'timestamp': '2025-09-10 02:39:35.835679', 'step': 11644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:35.865850', 'step': 11644, 'epoch': 2} {'type': 'loss', 'content': 0.1220388412475586, 'timestamp': '2025-09-10 02:39:35.868427', 'step': 11645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:35.899774', 'step': 11645, 'epoch': 2} {'type': 'loss', 'content': 0.08355174213647842, 'timestamp': '2025-09-10 02:39:35.902394', 'step': 11646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.933389', 'step': 11646, 'epoch': 2} {'type': 'loss', 'content': 0.06728709489107132, 'timestamp': '2025-09-10 02:39:35.935900', 'step': 11647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:35.965992', 'step': 11647, 'epoch': 2} {'type': 'loss', 'content': 0.07712554186582565, 'timestamp': '2025-09-10 02:39:35.990010', 'step': 11648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:36.020055', 'step': 11648, 'epoch': 2} {'type': 'loss', 'content': 0.03230169042944908, 'timestamp': '2025-09-10 02:39:36.022668', 'step': 11649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:36.053262', 'step': 11649, 'epoch': 2} {'type': 'loss', 'content': 0.13246910274028778, 'timestamp': '2025-09-10 02:39:36.055696', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:39:43.700872', 'step': 11650, 'epoch': 2} {'type': 'pplx', 'content': 12620.084464616872, 'timestamp': '2025-09-10 02:39:43.704123', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:43.733815', 'step': 11650, 'epoch': 2} {'type': 'loss', 'content': 0.07924049347639084, 'timestamp': '2025-09-10 02:39:43.736270', 'step': 11651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:43.766908', 'step': 11651, 'epoch': 2} {'type': 'loss', 'content': 0.11434510350227356, 'timestamp': '2025-09-10 02:39:43.790674', 'step': 11652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:43.820844', 'step': 11652, 'epoch': 2} {'type': 'loss', 'content': 0.10219720005989075, 'timestamp': '2025-09-10 02:39:43.822969', 'step': 11653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:43.852804', 'step': 11653, 'epoch': 2} {'type': 'loss', 'content': 0.1854802519083023, 'timestamp': '2025-09-10 02:39:43.855115', 'step': 11654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:43.885612', 'step': 11654, 'epoch': 2} {'type': 'loss', 'content': 0.09347514808177948, 'timestamp': '2025-09-10 02:39:43.888017', 'step': 11655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:43.919198', 'step': 11655, 'epoch': 2} {'type': 'loss', 'content': 0.083209328353405, 'timestamp': '2025-09-10 02:39:43.942847', 'step': 11656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:43.973351', 'step': 11656, 'epoch': 2} {'type': 'loss', 'content': 0.08289723843336105, 'timestamp': '2025-09-10 02:39:43.975702', 'step': 11657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.005850', 'step': 11657, 'epoch': 2} {'type': 'loss', 'content': 0.07100610435009003, 'timestamp': '2025-09-10 02:39:44.012484', 'step': 11658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.045434', 'step': 11658, 'epoch': 2} {'type': 'loss', 'content': 0.08699563890695572, 'timestamp': '2025-09-10 02:39:44.047823', 'step': 11659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.077785', 'step': 11659, 'epoch': 2} {'type': 'loss', 'content': 0.08786755800247192, 'timestamp': '2025-09-10 02:39:44.101286', 'step': 11660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.131891', 'step': 11660, 'epoch': 2} {'type': 'loss', 'content': 0.0794597715139389, 'timestamp': '2025-09-10 02:39:44.134044', 'step': 11661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.164547', 'step': 11661, 'epoch': 2} {'type': 'loss', 'content': 0.17293646931648254, 'timestamp': '2025-09-10 02:39:44.167208', 'step': 11662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.200138', 'step': 11662, 'epoch': 2} {'type': 'loss', 'content': 0.100436270236969, 'timestamp': '2025-09-10 02:39:44.202401', 'step': 11663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:44.232382', 'step': 11663, 'epoch': 2} {'type': 'loss', 'content': 0.15097789466381073, 'timestamp': '2025-09-10 02:39:44.255972', 'step': 11664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.286309', 'step': 11664, 'epoch': 2} {'type': 'loss', 'content': 0.05606459453701973, 'timestamp': '2025-09-10 02:39:44.288458', 'step': 11665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.319555', 'step': 11665, 'epoch': 2} {'type': 'loss', 'content': 0.11779657006263733, 'timestamp': '2025-09-10 02:39:44.321855', 'step': 11666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.351959', 'step': 11666, 'epoch': 2} {'type': 'loss', 'content': 0.08513541519641876, 'timestamp': '2025-09-10 02:39:44.354419', 'step': 11667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.383926', 'step': 11667, 'epoch': 2} {'type': 'loss', 'content': 0.07386225461959839, 'timestamp': '2025-09-10 02:39:44.407657', 'step': 11668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.438003', 'step': 11668, 'epoch': 2} {'type': 'loss', 'content': 0.14416052401065826, 'timestamp': '2025-09-10 02:39:44.440463', 'step': 11669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.470564', 'step': 11669, 'epoch': 2} {'type': 'loss', 'content': 0.12110665440559387, 'timestamp': '2025-09-10 02:39:44.472852', 'step': 11670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.505132', 'step': 11670, 'epoch': 2} {'type': 'loss', 'content': 0.062492385506629944, 'timestamp': '2025-09-10 02:39:44.507851', 'step': 11671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.538008', 'step': 11671, 'epoch': 2} {'type': 'loss', 'content': 0.23683850467205048, 'timestamp': '2025-09-10 02:39:44.562172', 'step': 11672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.593107', 'step': 11672, 'epoch': 2} {'type': 'loss', 'content': 0.07007596641778946, 'timestamp': '2025-09-10 02:39:44.595388', 'step': 11673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.625698', 'step': 11673, 'epoch': 2} {'type': 'loss', 'content': 0.06209949776530266, 'timestamp': '2025-09-10 02:39:44.628211', 'step': 11674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.657973', 'step': 11674, 'epoch': 2} {'type': 'loss', 'content': 0.1373402625322342, 'timestamp': '2025-09-10 02:39:44.660308', 'step': 11675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.690633', 'step': 11675, 'epoch': 2} {'type': 'loss', 'content': 0.12595374882221222, 'timestamp': '2025-09-10 02:39:44.714252', 'step': 11676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.744297', 'step': 11676, 'epoch': 2} {'type': 'loss', 'content': 0.120645672082901, 'timestamp': '2025-09-10 02:39:44.746429', 'step': 11677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.775997', 'step': 11677, 'epoch': 2} {'type': 'loss', 'content': 0.1067095696926117, 'timestamp': '2025-09-10 02:39:44.778507', 'step': 11678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.809089', 'step': 11678, 'epoch': 2} {'type': 'loss', 'content': 0.13803301751613617, 'timestamp': '2025-09-10 02:39:44.811551', 'step': 11679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:44.841685', 'step': 11679, 'epoch': 2} {'type': 'loss', 'content': 0.1008620634675026, 'timestamp': '2025-09-10 02:39:44.865390', 'step': 11680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.895765', 'step': 11680, 'epoch': 2} {'type': 'loss', 'content': 0.060138534754514694, 'timestamp': '2025-09-10 02:39:44.898370', 'step': 11681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:44.928416', 'step': 11681, 'epoch': 2} {'type': 'loss', 'content': 0.061440516263246536, 'timestamp': '2025-09-10 02:39:44.930930', 'step': 11682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.960978', 'step': 11682, 'epoch': 2} {'type': 'loss', 'content': 0.13101135194301605, 'timestamp': '2025-09-10 02:39:44.963349', 'step': 11683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:44.993046', 'step': 11683, 'epoch': 2} {'type': 'loss', 'content': 0.13302737474441528, 'timestamp': '2025-09-10 02:39:45.016707', 'step': 11684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:45.047810', 'step': 11684, 'epoch': 2} {'type': 'loss', 'content': 0.0866454616189003, 'timestamp': '2025-09-10 02:39:45.050713', 'step': 11685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:45.082299', 'step': 11685, 'epoch': 2} {'type': 'loss', 'content': 0.13491056859493256, 'timestamp': '2025-09-10 02:39:45.085101', 'step': 11686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.117203', 'step': 11686, 'epoch': 2} {'type': 'loss', 'content': 0.17748256027698517, 'timestamp': '2025-09-10 02:39:45.119597', 'step': 11687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:45.150253', 'step': 11687, 'epoch': 2} {'type': 'loss', 'content': 0.1394626796245575, 'timestamp': '2025-09-10 02:39:45.173980', 'step': 11688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:45.206125', 'step': 11688, 'epoch': 2} {'type': 'loss', 'content': 0.06172440946102142, 'timestamp': '2025-09-10 02:39:45.208842', 'step': 11689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:45.239021', 'step': 11689, 'epoch': 2} {'type': 'loss', 'content': 0.07226542383432388, 'timestamp': '2025-09-10 02:39:45.241864', 'step': 11690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:45.271686', 'step': 11690, 'epoch': 2} {'type': 'loss', 'content': 0.05736822634935379, 'timestamp': '2025-09-10 02:39:45.274432', 'step': 11691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:45.304613', 'step': 11691, 'epoch': 2} {'type': 'loss', 'content': 0.10655364394187927, 'timestamp': '2025-09-10 02:39:45.330192', 'step': 11692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.360027', 'step': 11692, 'epoch': 2} {'type': 'loss', 'content': 0.1526741236448288, 'timestamp': '2025-09-10 02:39:45.362449', 'step': 11693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:45.392546', 'step': 11693, 'epoch': 2} {'type': 'loss', 'content': 0.1336509883403778, 'timestamp': '2025-09-10 02:39:45.394888', 'step': 11694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:45.424624', 'step': 11694, 'epoch': 2} {'type': 'loss', 'content': 0.24040134251117706, 'timestamp': '2025-09-10 02:39:45.430100', 'step': 11695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.461024', 'step': 11695, 'epoch': 2} {'type': 'loss', 'content': 0.19822269678115845, 'timestamp': '2025-09-10 02:39:45.484734', 'step': 11696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.514789', 'step': 11696, 'epoch': 2} {'type': 'loss', 'content': 0.10551141947507858, 'timestamp': '2025-09-10 02:39:45.517099', 'step': 11697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.546788', 'step': 11697, 'epoch': 2} {'type': 'loss', 'content': 0.08053778856992722, 'timestamp': '2025-09-10 02:39:45.549332', 'step': 11698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.579746', 'step': 11698, 'epoch': 2} {'type': 'loss', 'content': 0.04635276272892952, 'timestamp': '2025-09-10 02:39:45.582197', 'step': 11699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.611541', 'step': 11699, 'epoch': 2} {'type': 'loss', 'content': 0.024860329926013947, 'timestamp': '2025-09-10 02:39:45.635160', 'step': 11700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:45.666052', 'step': 11700, 'epoch': 2} {'type': 'loss', 'content': 0.05407353490591049, 'timestamp': '2025-09-10 02:39:45.668453', 'step': 11701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:45.698440', 'step': 11701, 'epoch': 2} {'type': 'loss', 'content': 0.048131052404642105, 'timestamp': '2025-09-10 02:39:45.700730', 'step': 11702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:45.732146', 'step': 11702, 'epoch': 2} {'type': 'loss', 'content': 0.16200155019760132, 'timestamp': '2025-09-10 02:39:45.734425', 'step': 11703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.764259', 'step': 11703, 'epoch': 2} {'type': 'loss', 'content': 0.13498753309249878, 'timestamp': '2025-09-10 02:39:45.788026', 'step': 11704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.818123', 'step': 11704, 'epoch': 2} {'type': 'loss', 'content': 0.07413001358509064, 'timestamp': '2025-09-10 02:39:45.820608', 'step': 11705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.851066', 'step': 11705, 'epoch': 2} {'type': 'loss', 'content': 0.09634938091039658, 'timestamp': '2025-09-10 02:39:45.853547', 'step': 11706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.883643', 'step': 11706, 'epoch': 2} {'type': 'loss', 'content': 0.2173893004655838, 'timestamp': '2025-09-10 02:39:45.885934', 'step': 11707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:45.915550', 'step': 11707, 'epoch': 2} {'type': 'loss', 'content': 0.12209748476743698, 'timestamp': '2025-09-10 02:39:45.939476', 'step': 11708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:45.969725', 'step': 11708, 'epoch': 2} {'type': 'loss', 'content': 0.18322871625423431, 'timestamp': '2025-09-10 02:39:45.971943', 'step': 11709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:46.003137', 'step': 11709, 'epoch': 2} {'type': 'loss', 'content': 0.15309558808803558, 'timestamp': '2025-09-10 02:39:46.005605', 'step': 11710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:46.040580', 'step': 11710, 'epoch': 2} {'type': 'loss', 'content': 0.11881320178508759, 'timestamp': '2025-09-10 02:39:46.045255', 'step': 11711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:46.075280', 'step': 11711, 'epoch': 2} {'type': 'loss', 'content': 0.057248275727033615, 'timestamp': '2025-09-10 02:39:46.099018', 'step': 11712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:46.130349', 'step': 11712, 'epoch': 2} {'type': 'loss', 'content': 0.10575222969055176, 'timestamp': '2025-09-10 02:39:46.132737', 'step': 11713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:46.163288', 'step': 11713, 'epoch': 2} {'type': 'loss', 'content': 0.1558140367269516, 'timestamp': '2025-09-10 02:39:46.165717', 'step': 11714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:46.195866', 'step': 11714, 'epoch': 2} {'type': 'loss', 'content': 0.11704880744218826, 'timestamp': '2025-09-10 02:39:46.198341', 'step': 11715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:39:46.230327', 'step': 11715, 'epoch': 2} {'type': 'loss', 'content': 0.05858144909143448, 'timestamp': '2025-09-10 02:39:46.255071', 'step': 11716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:46.285483', 'step': 11716, 'epoch': 2} {'type': 'loss', 'content': 0.18697823584079742, 'timestamp': '2025-09-10 02:39:46.287743', 'step': 11717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:46.317477', 'step': 11717, 'epoch': 2} {'type': 'loss', 'content': 0.21602630615234375, 'timestamp': '2025-09-10 02:39:46.319819', 'step': 11718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:46.351615', 'step': 11718, 'epoch': 2} {'type': 'loss', 'content': 0.09469327330589294, 'timestamp': '2025-09-10 02:39:46.353974', 'step': 11719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:46.385190', 'step': 11719, 'epoch': 2} {'type': 'loss', 'content': 0.08573845773935318, 'timestamp': '2025-09-10 02:39:46.408655', 'step': 11720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:46.439485', 'step': 11720, 'epoch': 2} {'type': 'loss', 'content': 0.1448660045862198, 'timestamp': '2025-09-10 02:39:46.442277', 'step': 11721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:46.473062', 'step': 11721, 'epoch': 2} {'type': 'loss', 'content': 0.09271396696567535, 'timestamp': '2025-09-10 02:39:46.475349', 'step': 11722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:46.505047', 'step': 11722, 'epoch': 2} {'type': 'loss', 'content': 0.11378210037946701, 'timestamp': '2025-09-10 02:39:46.507793', 'step': 11723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:46.538132', 'step': 11723, 'epoch': 2} {'type': 'loss', 'content': 0.07716291397809982, 'timestamp': '2025-09-10 02:39:46.561659', 'step': 11724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:46.593820', 'step': 11724, 'epoch': 2} {'type': 'loss', 'content': 0.08452466130256653, 'timestamp': '2025-09-10 02:39:46.596692', 'step': 11725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:46.627692', 'step': 11725, 'epoch': 2} {'type': 'loss', 'content': 0.1887851357460022, 'timestamp': '2025-09-10 02:39:46.630789', 'step': 11726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:46.662125', 'step': 11726, 'epoch': 2} {'type': 'loss', 'content': 0.0699881836771965, 'timestamp': '2025-09-10 02:39:46.664957', 'step': 11727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:46.695879', 'step': 11727, 'epoch': 2} {'type': 'loss', 'content': 0.1231875866651535, 'timestamp': '2025-09-10 02:39:46.719663', 'step': 11728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:46.752844', 'step': 11728, 'epoch': 2} {'type': 'loss', 'content': 0.13967303931713104, 'timestamp': '2025-09-10 02:39:46.755427', 'step': 11729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:46.787088', 'step': 11729, 'epoch': 2} {'type': 'loss', 'content': 0.052452560514211655, 'timestamp': '2025-09-10 02:39:46.794101', 'step': 11730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:46.825745', 'step': 11730, 'epoch': 2} {'type': 'loss', 'content': 0.07672091573476791, 'timestamp': '2025-09-10 02:39:46.828119', 'step': 11731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:46.865702', 'step': 11731, 'epoch': 2} {'type': 'loss', 'content': 0.11785200238227844, 'timestamp': '2025-09-10 02:39:46.889203', 'step': 11732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:46.920095', 'step': 11732, 'epoch': 2} {'type': 'loss', 'content': 0.1205717995762825, 'timestamp': '2025-09-10 02:39:46.922522', 'step': 11733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:46.954538', 'step': 11733, 'epoch': 2} {'type': 'loss', 'content': 0.1853601634502411, 'timestamp': '2025-09-10 02:39:46.958994', 'step': 11734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:46.993423', 'step': 11734, 'epoch': 2} {'type': 'loss', 'content': 0.10874228924512863, 'timestamp': '2025-09-10 02:39:46.995879', 'step': 11735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:47.025575', 'step': 11735, 'epoch': 2} {'type': 'loss', 'content': 0.07598267495632172, 'timestamp': '2025-09-10 02:39:47.049457', 'step': 11736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.081499', 'step': 11736, 'epoch': 2} {'type': 'loss', 'content': 0.07872036844491959, 'timestamp': '2025-09-10 02:39:47.083874', 'step': 11737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:47.122615', 'step': 11737, 'epoch': 2} {'type': 'loss', 'content': 0.03763706609606743, 'timestamp': '2025-09-10 02:39:47.125022', 'step': 11738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:47.154901', 'step': 11738, 'epoch': 2} {'type': 'loss', 'content': 0.13515402376651764, 'timestamp': '2025-09-10 02:39:47.157814', 'step': 11739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:47.188419', 'step': 11739, 'epoch': 2} {'type': 'loss', 'content': 0.1836474984884262, 'timestamp': '2025-09-10 02:39:47.213077', 'step': 11740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.245009', 'step': 11740, 'epoch': 2} {'type': 'loss', 'content': 0.10421179234981537, 'timestamp': '2025-09-10 02:39:47.247336', 'step': 11741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:47.277870', 'step': 11741, 'epoch': 2} {'type': 'loss', 'content': 0.04618711769580841, 'timestamp': '2025-09-10 02:39:47.280398', 'step': 11742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:47.311811', 'step': 11742, 'epoch': 2} {'type': 'loss', 'content': 0.10876286774873734, 'timestamp': '2025-09-10 02:39:47.314375', 'step': 11743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:47.347869', 'step': 11743, 'epoch': 2} {'type': 'loss', 'content': 0.1272541880607605, 'timestamp': '2025-09-10 02:39:47.373294', 'step': 11744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.403856', 'step': 11744, 'epoch': 2} {'type': 'loss', 'content': 0.1033649668097496, 'timestamp': '2025-09-10 02:39:47.406577', 'step': 11745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.436995', 'step': 11745, 'epoch': 2} {'type': 'loss', 'content': 0.10646561533212662, 'timestamp': '2025-09-10 02:39:47.439614', 'step': 11746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:47.473628', 'step': 11746, 'epoch': 2} {'type': 'loss', 'content': 0.15401192009449005, 'timestamp': '2025-09-10 02:39:47.476229', 'step': 11747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.506662', 'step': 11747, 'epoch': 2} {'type': 'loss', 'content': 0.07484287023544312, 'timestamp': '2025-09-10 02:39:47.530307', 'step': 11748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:47.562136', 'step': 11748, 'epoch': 2} {'type': 'loss', 'content': 0.08882420510053635, 'timestamp': '2025-09-10 02:39:47.564459', 'step': 11749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.596183', 'step': 11749, 'epoch': 2} {'type': 'loss', 'content': 0.2115228772163391, 'timestamp': '2025-09-10 02:39:47.600904', 'step': 11750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:47.631086', 'step': 11750, 'epoch': 2} {'type': 'loss', 'content': 0.10543682426214218, 'timestamp': '2025-09-10 02:39:47.633309', 'step': 11751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.663401', 'step': 11751, 'epoch': 2} {'type': 'loss', 'content': 0.07136673480272293, 'timestamp': '2025-09-10 02:39:47.687113', 'step': 11752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:47.717525', 'step': 11752, 'epoch': 2} {'type': 'loss', 'content': 0.23302416503429413, 'timestamp': '2025-09-10 02:39:47.721090', 'step': 11753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:47.751406', 'step': 11753, 'epoch': 2} {'type': 'loss', 'content': 0.08016610145568848, 'timestamp': '2025-09-10 02:39:47.753709', 'step': 11754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:47.784220', 'step': 11754, 'epoch': 2} {'type': 'loss', 'content': 0.09732300043106079, 'timestamp': '2025-09-10 02:39:47.787113', 'step': 11755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:47.817188', 'step': 11755, 'epoch': 2} {'type': 'loss', 'content': 0.14814192056655884, 'timestamp': '2025-09-10 02:39:47.841006', 'step': 11756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.872007', 'step': 11756, 'epoch': 2} {'type': 'loss', 'content': 0.1497865617275238, 'timestamp': '2025-09-10 02:39:47.874596', 'step': 11757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:47.905742', 'step': 11757, 'epoch': 2} {'type': 'loss', 'content': 0.08535978943109512, 'timestamp': '2025-09-10 02:39:47.908596', 'step': 11758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.938870', 'step': 11758, 'epoch': 2} {'type': 'loss', 'content': 0.2056356966495514, 'timestamp': '2025-09-10 02:39:47.941412', 'step': 11759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:47.971784', 'step': 11759, 'epoch': 2} {'type': 'loss', 'content': 0.15722769498825073, 'timestamp': '2025-09-10 02:39:47.995679', 'step': 11760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:39:48.026225', 'step': 11760, 'epoch': 2} {'type': 'loss', 'content': 0.16930541396141052, 'timestamp': '2025-09-10 02:39:48.033611', 'step': 11761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:48.066783', 'step': 11761, 'epoch': 2} {'type': 'loss', 'content': 0.06822717934846878, 'timestamp': '2025-09-10 02:39:48.069426', 'step': 11762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.100200', 'step': 11762, 'epoch': 2} {'type': 'loss', 'content': 0.11175722628831863, 'timestamp': '2025-09-10 02:39:48.102638', 'step': 11763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.132466', 'step': 11763, 'epoch': 2} {'type': 'loss', 'content': 0.09443162381649017, 'timestamp': '2025-09-10 02:39:48.156177', 'step': 11764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.186649', 'step': 11764, 'epoch': 2} {'type': 'loss', 'content': 0.11781468242406845, 'timestamp': '2025-09-10 02:39:48.190311', 'step': 11765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:48.221068', 'step': 11765, 'epoch': 2} {'type': 'loss', 'content': 0.056493766605854034, 'timestamp': '2025-09-10 02:39:48.223355', 'step': 11766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:48.255314', 'step': 11766, 'epoch': 2} {'type': 'loss', 'content': 0.09308745712041855, 'timestamp': '2025-09-10 02:39:48.258328', 'step': 11767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:48.288009', 'step': 11767, 'epoch': 2} {'type': 'loss', 'content': 0.17867664992809296, 'timestamp': '2025-09-10 02:39:48.311764', 'step': 11768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.342588', 'step': 11768, 'epoch': 2} {'type': 'loss', 'content': 0.11903047561645508, 'timestamp': '2025-09-10 02:39:48.345280', 'step': 11769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:48.376363', 'step': 11769, 'epoch': 2} {'type': 'loss', 'content': 0.14284732937812805, 'timestamp': '2025-09-10 02:39:48.378668', 'step': 11770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:48.409200', 'step': 11770, 'epoch': 2} {'type': 'loss', 'content': 0.034092169255018234, 'timestamp': '2025-09-10 02:39:48.411254', 'step': 11771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.441474', 'step': 11771, 'epoch': 2} {'type': 'loss', 'content': 0.14813534915447235, 'timestamp': '2025-09-10 02:39:48.465086', 'step': 11772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.495187', 'step': 11772, 'epoch': 2} {'type': 'loss', 'content': 0.062306419014930725, 'timestamp': '2025-09-10 02:39:48.497646', 'step': 11773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.527965', 'step': 11773, 'epoch': 2} {'type': 'loss', 'content': 0.14904344081878662, 'timestamp': '2025-09-10 02:39:48.532184', 'step': 11774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:48.561959', 'step': 11774, 'epoch': 2} {'type': 'loss', 'content': 0.15174807608127594, 'timestamp': '2025-09-10 02:39:48.564395', 'step': 11775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:48.594332', 'step': 11775, 'epoch': 2} {'type': 'loss', 'content': 0.07891271263360977, 'timestamp': '2025-09-10 02:39:48.618045', 'step': 11776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:48.649428', 'step': 11776, 'epoch': 2} {'type': 'loss', 'content': 0.21545597910881042, 'timestamp': '2025-09-10 02:39:48.652094', 'step': 11777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:48.682122', 'step': 11777, 'epoch': 2} {'type': 'loss', 'content': 0.06291419267654419, 'timestamp': '2025-09-10 02:39:48.684442', 'step': 11778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.715174', 'step': 11778, 'epoch': 2} {'type': 'loss', 'content': 0.11189811676740646, 'timestamp': '2025-09-10 02:39:48.717723', 'step': 11779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:48.748938', 'step': 11779, 'epoch': 2} {'type': 'loss', 'content': 0.14600297808647156, 'timestamp': '2025-09-10 02:39:48.772884', 'step': 11780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:48.803548', 'step': 11780, 'epoch': 2} {'type': 'loss', 'content': 0.0873604342341423, 'timestamp': '2025-09-10 02:39:48.806521', 'step': 11781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:48.837184', 'step': 11781, 'epoch': 2} {'type': 'loss', 'content': 0.11528222262859344, 'timestamp': '2025-09-10 02:39:48.839688', 'step': 11782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.870038', 'step': 11782, 'epoch': 2} {'type': 'loss', 'content': 0.14160197973251343, 'timestamp': '2025-09-10 02:39:48.872339', 'step': 11783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:48.902264', 'step': 11783, 'epoch': 2} {'type': 'loss', 'content': 0.14294877648353577, 'timestamp': '2025-09-10 02:39:48.925865', 'step': 11784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:48.956955', 'step': 11784, 'epoch': 2} {'type': 'loss', 'content': 0.08411136269569397, 'timestamp': '2025-09-10 02:39:48.959253', 'step': 11785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:48.989965', 'step': 11785, 'epoch': 2} {'type': 'loss', 'content': 0.14512720704078674, 'timestamp': '2025-09-10 02:39:48.992472', 'step': 11786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:49.022481', 'step': 11786, 'epoch': 2} {'type': 'loss', 'content': 0.13659873604774475, 'timestamp': '2025-09-10 02:39:49.030903', 'step': 11787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.069418', 'step': 11787, 'epoch': 2} {'type': 'loss', 'content': 0.07950358092784882, 'timestamp': '2025-09-10 02:39:49.093436', 'step': 11788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.124156', 'step': 11788, 'epoch': 2} {'type': 'loss', 'content': 0.10962416976690292, 'timestamp': '2025-09-10 02:39:49.126748', 'step': 11789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:49.157311', 'step': 11789, 'epoch': 2} {'type': 'loss', 'content': 0.18426138162612915, 'timestamp': '2025-09-10 02:39:49.160506', 'step': 11790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:49.191641', 'step': 11790, 'epoch': 2} {'type': 'loss', 'content': 0.12468327581882477, 'timestamp': '2025-09-10 02:39:49.193885', 'step': 11791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.223467', 'step': 11791, 'epoch': 2} {'type': 'loss', 'content': 0.08827821165323257, 'timestamp': '2025-09-10 02:39:49.247124', 'step': 11792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.277854', 'step': 11792, 'epoch': 2} {'type': 'loss', 'content': 0.12545709311962128, 'timestamp': '2025-09-10 02:39:49.280120', 'step': 11793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:49.310631', 'step': 11793, 'epoch': 2} {'type': 'loss', 'content': 0.05587627366185188, 'timestamp': '2025-09-10 02:39:49.312967', 'step': 11794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.342451', 'step': 11794, 'epoch': 2} {'type': 'loss', 'content': 0.10681827366352081, 'timestamp': '2025-09-10 02:39:49.345416', 'step': 11795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:49.378403', 'step': 11795, 'epoch': 2} {'type': 'loss', 'content': 0.14409630000591278, 'timestamp': '2025-09-10 02:39:49.402287', 'step': 11796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:49.432540', 'step': 11796, 'epoch': 2} {'type': 'loss', 'content': 0.08467599004507065, 'timestamp': '2025-09-10 02:39:49.434868', 'step': 11797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.464628', 'step': 11797, 'epoch': 2} {'type': 'loss', 'content': 0.08173337578773499, 'timestamp': '2025-09-10 02:39:49.467105', 'step': 11798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.496958', 'step': 11798, 'epoch': 2} {'type': 'loss', 'content': 0.12517264485359192, 'timestamp': '2025-09-10 02:39:49.499503', 'step': 11799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.530978', 'step': 11799, 'epoch': 2} {'type': 'loss', 'content': 0.0639193132519722, 'timestamp': '2025-09-10 02:39:49.554540', 'step': 11800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:49.585161', 'step': 11800, 'epoch': 2} {'type': 'loss', 'content': 0.10182704031467438, 'timestamp': '2025-09-10 02:39:49.588091', 'step': 11801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:49.620386', 'step': 11801, 'epoch': 2} {'type': 'loss', 'content': 0.09885375201702118, 'timestamp': '2025-09-10 02:39:49.622821', 'step': 11802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.652821', 'step': 11802, 'epoch': 2} {'type': 'loss', 'content': 0.09469403326511383, 'timestamp': '2025-09-10 02:39:49.655107', 'step': 11803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:49.685005', 'step': 11803, 'epoch': 2} {'type': 'loss', 'content': 0.1654348373413086, 'timestamp': '2025-09-10 02:39:49.708631', 'step': 11804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.740131', 'step': 11804, 'epoch': 2} {'type': 'loss', 'content': 0.08110513538122177, 'timestamp': '2025-09-10 02:39:49.743000', 'step': 11805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:49.774783', 'step': 11805, 'epoch': 2} {'type': 'loss', 'content': 0.16011296212673187, 'timestamp': '2025-09-10 02:39:49.777423', 'step': 11806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:49.807473', 'step': 11806, 'epoch': 2} {'type': 'loss', 'content': 0.17236657440662384, 'timestamp': '2025-09-10 02:39:49.810077', 'step': 11807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:49.840671', 'step': 11807, 'epoch': 2} {'type': 'loss', 'content': 0.1707107126712799, 'timestamp': '2025-09-10 02:39:49.864351', 'step': 11808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:49.895029', 'step': 11808, 'epoch': 2} {'type': 'loss', 'content': 0.10035788267850876, 'timestamp': '2025-09-10 02:39:49.897092', 'step': 11809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:49.927689', 'step': 11809, 'epoch': 2} {'type': 'loss', 'content': 0.16371096670627594, 'timestamp': '2025-09-10 02:39:49.930102', 'step': 11810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:49.962027', 'step': 11810, 'epoch': 2} {'type': 'loss', 'content': 0.07829233258962631, 'timestamp': '2025-09-10 02:39:49.964290', 'step': 11811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:49.994249', 'step': 11811, 'epoch': 2} {'type': 'loss', 'content': 0.11574066430330276, 'timestamp': '2025-09-10 02:39:50.018183', 'step': 11812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:50.048697', 'step': 11812, 'epoch': 2} {'type': 'loss', 'content': 0.09840912371873856, 'timestamp': '2025-09-10 02:39:50.052799', 'step': 11813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:50.083317', 'step': 11813, 'epoch': 2} {'type': 'loss', 'content': 0.09612604230642319, 'timestamp': '2025-09-10 02:39:50.085942', 'step': 11814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:50.115703', 'step': 11814, 'epoch': 2} {'type': 'loss', 'content': 0.18779700994491577, 'timestamp': '2025-09-10 02:39:50.118241', 'step': 11815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:50.148683', 'step': 11815, 'epoch': 2} {'type': 'loss', 'content': 0.04289665445685387, 'timestamp': '2025-09-10 02:39:50.174726', 'step': 11816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:50.205917', 'step': 11816, 'epoch': 2} {'type': 'loss', 'content': 0.15248815715312958, 'timestamp': '2025-09-10 02:39:50.209958', 'step': 11817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:50.240906', 'step': 11817, 'epoch': 2} {'type': 'loss', 'content': 0.1722419559955597, 'timestamp': '2025-09-10 02:39:50.243363', 'step': 11818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:50.273252', 'step': 11818, 'epoch': 2} {'type': 'loss', 'content': 0.18449212610721588, 'timestamp': '2025-09-10 02:39:50.276256', 'step': 11819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:50.306040', 'step': 11819, 'epoch': 2} {'type': 'loss', 'content': 0.03308643400669098, 'timestamp': '2025-09-10 02:39:50.329485', 'step': 11820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:50.359953', 'step': 11820, 'epoch': 2} {'type': 'loss', 'content': 0.02690030075609684, 'timestamp': '2025-09-10 02:39:50.362414', 'step': 11821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:50.393020', 'step': 11821, 'epoch': 2} {'type': 'loss', 'content': 0.16303686797618866, 'timestamp': '2025-09-10 02:39:50.395149', 'step': 11822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:50.424411', 'step': 11822, 'epoch': 2} {'type': 'loss', 'content': 0.04859809949994087, 'timestamp': '2025-09-10 02:39:50.426823', 'step': 11823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:50.457641', 'step': 11823, 'epoch': 2} {'type': 'loss', 'content': 0.10659000277519226, 'timestamp': '2025-09-10 02:39:50.481539', 'step': 11824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:50.511963', 'step': 11824, 'epoch': 2} {'type': 'loss', 'content': 0.13634608685970306, 'timestamp': '2025-09-10 02:39:50.514489', 'step': 11825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:50.545336', 'step': 11825, 'epoch': 2} {'type': 'loss', 'content': 0.10516639053821564, 'timestamp': '2025-09-10 02:39:50.547889', 'step': 11826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:50.578438', 'step': 11826, 'epoch': 2} {'type': 'loss', 'content': 0.0760781392455101, 'timestamp': '2025-09-10 02:39:50.580880', 'step': 11827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:50.611564', 'step': 11827, 'epoch': 2} {'type': 'loss', 'content': 0.1881784051656723, 'timestamp': '2025-09-10 02:39:50.635224', 'step': 11828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:50.665328', 'step': 11828, 'epoch': 2} {'type': 'loss', 'content': 0.09303057193756104, 'timestamp': '2025-09-10 02:39:50.667856', 'step': 11829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:50.697958', 'step': 11829, 'epoch': 2} {'type': 'loss', 'content': 0.23665478825569153, 'timestamp': '2025-09-10 02:39:50.700853', 'step': 11830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:50.730920', 'step': 11830, 'epoch': 2} {'type': 'loss', 'content': 0.12957321107387543, 'timestamp': '2025-09-10 02:39:50.733400', 'step': 11831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:50.763164', 'step': 11831, 'epoch': 2} {'type': 'loss', 'content': 0.18315434455871582, 'timestamp': '2025-09-10 02:39:50.788088', 'step': 11832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:50.818466', 'step': 11832, 'epoch': 2} {'type': 'loss', 'content': 0.11202001571655273, 'timestamp': '2025-09-10 02:39:50.820987', 'step': 11833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:50.850781', 'step': 11833, 'epoch': 2} {'type': 'loss', 'content': 0.19932016730308533, 'timestamp': '2025-09-10 02:39:50.853620', 'step': 11834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:50.883668', 'step': 11834, 'epoch': 2} {'type': 'loss', 'content': 0.15725477039813995, 'timestamp': '2025-09-10 02:39:50.886241', 'step': 11835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:50.916248', 'step': 11835, 'epoch': 2} {'type': 'loss', 'content': 0.10659562051296234, 'timestamp': '2025-09-10 02:39:50.940529', 'step': 11836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:50.978339', 'step': 11836, 'epoch': 2} {'type': 'loss', 'content': 0.053278982639312744, 'timestamp': '2025-09-10 02:39:50.980857', 'step': 11837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.012567', 'step': 11837, 'epoch': 2} {'type': 'loss', 'content': 0.0950065404176712, 'timestamp': '2025-09-10 02:39:51.015786', 'step': 11838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:51.049948', 'step': 11838, 'epoch': 2} {'type': 'loss', 'content': 0.13212208449840546, 'timestamp': '2025-09-10 02:39:51.053211', 'step': 11839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:51.092681', 'step': 11839, 'epoch': 2} {'type': 'loss', 'content': 0.10422077029943466, 'timestamp': '2025-09-10 02:39:51.117101', 'step': 11840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:51.148175', 'step': 11840, 'epoch': 2} {'type': 'loss', 'content': 0.15659326314926147, 'timestamp': '2025-09-10 02:39:51.150727', 'step': 11841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.180819', 'step': 11841, 'epoch': 2} {'type': 'loss', 'content': 0.1416698843240738, 'timestamp': '2025-09-10 02:39:51.183311', 'step': 11842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.213666', 'step': 11842, 'epoch': 2} {'type': 'loss', 'content': 0.10496242344379425, 'timestamp': '2025-09-10 02:39:51.216260', 'step': 11843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.245655', 'step': 11843, 'epoch': 2} {'type': 'loss', 'content': 0.04116739332675934, 'timestamp': '2025-09-10 02:39:51.269528', 'step': 11844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:51.299919', 'step': 11844, 'epoch': 2} {'type': 'loss', 'content': 0.1381227672100067, 'timestamp': '2025-09-10 02:39:51.302268', 'step': 11845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.332184', 'step': 11845, 'epoch': 2} {'type': 'loss', 'content': 0.11646692454814911, 'timestamp': '2025-09-10 02:39:51.334827', 'step': 11846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:51.364903', 'step': 11846, 'epoch': 2} {'type': 'loss', 'content': 0.1273338794708252, 'timestamp': '2025-09-10 02:39:51.368979', 'step': 11847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:51.399373', 'step': 11847, 'epoch': 2} {'type': 'loss', 'content': 0.02290821634232998, 'timestamp': '2025-09-10 02:39:51.422925', 'step': 11848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.453076', 'step': 11848, 'epoch': 2} {'type': 'loss', 'content': 0.12694375216960907, 'timestamp': '2025-09-10 02:39:51.456234', 'step': 11849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:51.486713', 'step': 11849, 'epoch': 2} {'type': 'loss', 'content': 0.05521061643958092, 'timestamp': '2025-09-10 02:39:51.489217', 'step': 11850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:51.519543', 'step': 11850, 'epoch': 2} {'type': 'loss', 'content': 0.09174388647079468, 'timestamp': '2025-09-10 02:39:51.521981', 'step': 11851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:51.551825', 'step': 11851, 'epoch': 2} {'type': 'loss', 'content': 0.12050771713256836, 'timestamp': '2025-09-10 02:39:51.575936', 'step': 11852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.606368', 'step': 11852, 'epoch': 2} {'type': 'loss', 'content': 0.04763723909854889, 'timestamp': '2025-09-10 02:39:51.608509', 'step': 11853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:51.638406', 'step': 11853, 'epoch': 2} {'type': 'loss', 'content': 0.14931152760982513, 'timestamp': '2025-09-10 02:39:51.640711', 'step': 11854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.671211', 'step': 11854, 'epoch': 2} {'type': 'loss', 'content': 0.14059008657932281, 'timestamp': '2025-09-10 02:39:51.674716', 'step': 11855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.705401', 'step': 11855, 'epoch': 2} {'type': 'loss', 'content': 0.19798342883586884, 'timestamp': '2025-09-10 02:39:51.729046', 'step': 11856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:51.759480', 'step': 11856, 'epoch': 2} {'type': 'loss', 'content': 0.1286136507987976, 'timestamp': '2025-09-10 02:39:51.761834', 'step': 11857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.791717', 'step': 11857, 'epoch': 2} {'type': 'loss', 'content': 0.11483961343765259, 'timestamp': '2025-09-10 02:39:51.794105', 'step': 11858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:51.824512', 'step': 11858, 'epoch': 2} {'type': 'loss', 'content': 0.09259379655122757, 'timestamp': '2025-09-10 02:39:51.827001', 'step': 11859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.857072', 'step': 11859, 'epoch': 2} {'type': 'loss', 'content': 0.15024296939373016, 'timestamp': '2025-09-10 02:39:51.880965', 'step': 11860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:51.911369', 'step': 11860, 'epoch': 2} {'type': 'loss', 'content': 0.1081901416182518, 'timestamp': '2025-09-10 02:39:51.913983', 'step': 11861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:51.944207', 'step': 11861, 'epoch': 2} {'type': 'loss', 'content': 0.20889584720134735, 'timestamp': '2025-09-10 02:39:51.958424', 'step': 11862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:51.988887', 'step': 11862, 'epoch': 2} {'type': 'loss', 'content': 0.13391819596290588, 'timestamp': '2025-09-10 02:39:51.991422', 'step': 11863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.022313', 'step': 11863, 'epoch': 2} {'type': 'loss', 'content': 0.09228412806987762, 'timestamp': '2025-09-10 02:39:52.046084', 'step': 11864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:52.076479', 'step': 11864, 'epoch': 2} {'type': 'loss', 'content': 0.05339088290929794, 'timestamp': '2025-09-10 02:39:52.079725', 'step': 11865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:52.110441', 'step': 11865, 'epoch': 2} {'type': 'loss', 'content': 0.1334245204925537, 'timestamp': '2025-09-10 02:39:52.113207', 'step': 11866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.143920', 'step': 11866, 'epoch': 2} {'type': 'loss', 'content': 0.06203112006187439, 'timestamp': '2025-09-10 02:39:52.146165', 'step': 11867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:52.176135', 'step': 11867, 'epoch': 2} {'type': 'loss', 'content': 0.12751927971839905, 'timestamp': '2025-09-10 02:39:52.201140', 'step': 11868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:52.231815', 'step': 11868, 'epoch': 2} {'type': 'loss', 'content': 0.06903765350580215, 'timestamp': '2025-09-10 02:39:52.234142', 'step': 11869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:52.264376', 'step': 11869, 'epoch': 2} {'type': 'loss', 'content': 0.09028120338916779, 'timestamp': '2025-09-10 02:39:52.266719', 'step': 11870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:52.296943', 'step': 11870, 'epoch': 2} {'type': 'loss', 'content': 0.15080486238002777, 'timestamp': '2025-09-10 02:39:52.299740', 'step': 11871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:52.329913', 'step': 11871, 'epoch': 2} {'type': 'loss', 'content': 0.09674253314733505, 'timestamp': '2025-09-10 02:39:52.353691', 'step': 11872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:52.383967', 'step': 11872, 'epoch': 2} {'type': 'loss', 'content': 0.1303333342075348, 'timestamp': '2025-09-10 02:39:52.386639', 'step': 11873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.416763', 'step': 11873, 'epoch': 2} {'type': 'loss', 'content': 0.13787059485912323, 'timestamp': '2025-09-10 02:39:52.419510', 'step': 11874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:52.449120', 'step': 11874, 'epoch': 2} {'type': 'loss', 'content': 0.03225287050008774, 'timestamp': '2025-09-10 02:39:52.453780', 'step': 11875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.485273', 'step': 11875, 'epoch': 2} {'type': 'loss', 'content': 0.09143437445163727, 'timestamp': '2025-09-10 02:39:52.508692', 'step': 11876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.539059', 'step': 11876, 'epoch': 2} {'type': 'loss', 'content': 0.08593641221523285, 'timestamp': '2025-09-10 02:39:52.541403', 'step': 11877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.572194', 'step': 11877, 'epoch': 2} {'type': 'loss', 'content': 0.13248729705810547, 'timestamp': '2025-09-10 02:39:52.574397', 'step': 11878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:52.605584', 'step': 11878, 'epoch': 2} {'type': 'loss', 'content': 0.11743023991584778, 'timestamp': '2025-09-10 02:39:52.608108', 'step': 11879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.638849', 'step': 11879, 'epoch': 2} {'type': 'loss', 'content': 0.1341569423675537, 'timestamp': '2025-09-10 02:39:52.662587', 'step': 11880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.694195', 'step': 11880, 'epoch': 2} {'type': 'loss', 'content': 0.09687932580709457, 'timestamp': '2025-09-10 02:39:52.698281', 'step': 11881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:52.733429', 'step': 11881, 'epoch': 2} {'type': 'loss', 'content': 0.12100506573915482, 'timestamp': '2025-09-10 02:39:52.736041', 'step': 11882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.766054', 'step': 11882, 'epoch': 2} {'type': 'loss', 'content': 0.16563169658184052, 'timestamp': '2025-09-10 02:39:52.768552', 'step': 11883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:52.799158', 'step': 11883, 'epoch': 2} {'type': 'loss', 'content': 0.10742789506912231, 'timestamp': '2025-09-10 02:39:52.822721', 'step': 11884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:52.853257', 'step': 11884, 'epoch': 2} {'type': 'loss', 'content': 0.09744879603385925, 'timestamp': '2025-09-10 02:39:52.855726', 'step': 11885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.886004', 'step': 11885, 'epoch': 2} {'type': 'loss', 'content': 0.16384422779083252, 'timestamp': '2025-09-10 02:39:52.889352', 'step': 11886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:52.920773', 'step': 11886, 'epoch': 2} {'type': 'loss', 'content': 0.08938515931367874, 'timestamp': '2025-09-10 02:39:52.924516', 'step': 11887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:52.955271', 'step': 11887, 'epoch': 2} {'type': 'loss', 'content': 0.06573034077882767, 'timestamp': '2025-09-10 02:39:52.978988', 'step': 11888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:53.012946', 'step': 11888, 'epoch': 2} {'type': 'loss', 'content': 0.1450648158788681, 'timestamp': '2025-09-10 02:39:53.015467', 'step': 11889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:53.045325', 'step': 11889, 'epoch': 2} {'type': 'loss', 'content': 0.10341044515371323, 'timestamp': '2025-09-10 02:39:53.052017', 'step': 11890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:53.083812', 'step': 11890, 'epoch': 2} {'type': 'loss', 'content': 0.09328719228506088, 'timestamp': '2025-09-10 02:39:53.087684', 'step': 11891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:53.122414', 'step': 11891, 'epoch': 2} {'type': 'loss', 'content': 0.14997152984142303, 'timestamp': '2025-09-10 02:39:53.145970', 'step': 11892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:53.176378', 'step': 11892, 'epoch': 2} {'type': 'loss', 'content': 0.11263635009527206, 'timestamp': '2025-09-10 02:39:53.178640', 'step': 11893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:53.208446', 'step': 11893, 'epoch': 2} {'type': 'loss', 'content': 0.11843886226415634, 'timestamp': '2025-09-10 02:39:53.210718', 'step': 11894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:53.240948', 'step': 11894, 'epoch': 2} {'type': 'loss', 'content': 0.24257346987724304, 'timestamp': '2025-09-10 02:39:53.243237', 'step': 11895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:53.272685', 'step': 11895, 'epoch': 2} {'type': 'loss', 'content': 0.045115403831005096, 'timestamp': '2025-09-10 02:39:53.296316', 'step': 11896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:53.327012', 'step': 11896, 'epoch': 2} {'type': 'loss', 'content': 0.1682790368795395, 'timestamp': '2025-09-10 02:39:53.329380', 'step': 11897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:53.359707', 'step': 11897, 'epoch': 2} {'type': 'loss', 'content': 0.1033775806427002, 'timestamp': '2025-09-10 02:39:53.361960', 'step': 11898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:53.392180', 'step': 11898, 'epoch': 2} {'type': 'loss', 'content': 0.11169056594371796, 'timestamp': '2025-09-10 02:39:53.396142', 'step': 11899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:53.426341', 'step': 11899, 'epoch': 2} {'type': 'loss', 'content': 0.09816931933164597, 'timestamp': '2025-09-10 02:39:53.450125', 'step': 11900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:53.481539', 'step': 11900, 'epoch': 2} {'type': 'loss', 'content': 0.19317351281642914, 'timestamp': '2025-09-10 02:39:53.483882', 'step': 11901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:53.513874', 'step': 11901, 'epoch': 2} {'type': 'loss', 'content': 0.13242675364017487, 'timestamp': '2025-09-10 02:39:53.516163', 'step': 11902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:53.546312', 'step': 11902, 'epoch': 2} {'type': 'loss', 'content': 0.13739484548568726, 'timestamp': '2025-09-10 02:39:53.548692', 'step': 11903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:53.579493', 'step': 11903, 'epoch': 2} {'type': 'loss', 'content': 0.1633649468421936, 'timestamp': '2025-09-10 02:39:53.603855', 'step': 11904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:53.640928', 'step': 11904, 'epoch': 2} {'type': 'loss', 'content': 0.13517600297927856, 'timestamp': '2025-09-10 02:39:53.643608', 'step': 11905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:53.683930', 'step': 11905, 'epoch': 2} {'type': 'loss', 'content': 0.0758550688624382, 'timestamp': '2025-09-10 02:39:53.686891', 'step': 11906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:53.720501', 'step': 11906, 'epoch': 2} {'type': 'loss', 'content': 0.07315869629383087, 'timestamp': '2025-09-10 02:39:53.723218', 'step': 11907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:53.759443', 'step': 11907, 'epoch': 2} {'type': 'loss', 'content': 0.1346740424633026, 'timestamp': '2025-09-10 02:39:53.783319', 'step': 11908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:53.821072', 'step': 11908, 'epoch': 2} {'type': 'loss', 'content': 0.06106353551149368, 'timestamp': '2025-09-10 02:39:53.824423', 'step': 11909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:53.856016', 'step': 11909, 'epoch': 2} {'type': 'loss', 'content': 0.19559499621391296, 'timestamp': '2025-09-10 02:39:53.859377', 'step': 11910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:53.889915', 'step': 11910, 'epoch': 2} {'type': 'loss', 'content': 0.12555363774299622, 'timestamp': '2025-09-10 02:39:53.892481', 'step': 11911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:53.925593', 'step': 11911, 'epoch': 2} {'type': 'loss', 'content': 0.09698715806007385, 'timestamp': '2025-09-10 02:39:53.951866', 'step': 11912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:53.982777', 'step': 11912, 'epoch': 2} {'type': 'loss', 'content': 0.09974286705255508, 'timestamp': '2025-09-10 02:39:53.988114', 'step': 11913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.029943', 'step': 11913, 'epoch': 2} {'type': 'loss', 'content': 0.12711402773857117, 'timestamp': '2025-09-10 02:39:54.033483', 'step': 11914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:54.072032', 'step': 11914, 'epoch': 2} {'type': 'loss', 'content': 0.1920367330312729, 'timestamp': '2025-09-10 02:39:54.077021', 'step': 11915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.118427', 'step': 11915, 'epoch': 2} {'type': 'loss', 'content': 0.07994091510772705, 'timestamp': '2025-09-10 02:39:54.143186', 'step': 11916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.175968', 'step': 11916, 'epoch': 2} {'type': 'loss', 'content': 0.06107867881655693, 'timestamp': '2025-09-10 02:39:54.179069', 'step': 11917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:54.213042', 'step': 11917, 'epoch': 2} {'type': 'loss', 'content': 0.09260957688093185, 'timestamp': '2025-09-10 02:39:54.216913', 'step': 11918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:54.250437', 'step': 11918, 'epoch': 2} {'type': 'loss', 'content': 0.12392786890268326, 'timestamp': '2025-09-10 02:39:54.253932', 'step': 11919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.289422', 'step': 11919, 'epoch': 2} {'type': 'loss', 'content': 0.12741822004318237, 'timestamp': '2025-09-10 02:39:54.313799', 'step': 11920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.346296', 'step': 11920, 'epoch': 2} {'type': 'loss', 'content': 0.10804712772369385, 'timestamp': '2025-09-10 02:39:54.348522', 'step': 11921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.379888', 'step': 11921, 'epoch': 2} {'type': 'loss', 'content': 0.07983759790658951, 'timestamp': '2025-09-10 02:39:54.383426', 'step': 11922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.422659', 'step': 11922, 'epoch': 2} {'type': 'loss', 'content': 0.14981773495674133, 'timestamp': '2025-09-10 02:39:54.425591', 'step': 11923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.460556', 'step': 11923, 'epoch': 2} {'type': 'loss', 'content': 0.1425386220216751, 'timestamp': '2025-09-10 02:39:54.485666', 'step': 11924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.516000', 'step': 11924, 'epoch': 2} {'type': 'loss', 'content': 0.06268345564603806, 'timestamp': '2025-09-10 02:39:54.518429', 'step': 11925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.548458', 'step': 11925, 'epoch': 2} {'type': 'loss', 'content': 0.12960226833820343, 'timestamp': '2025-09-10 02:39:54.550990', 'step': 11926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:54.580432', 'step': 11926, 'epoch': 2} {'type': 'loss', 'content': 0.11553184688091278, 'timestamp': '2025-09-10 02:39:54.582781', 'step': 11927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:39:54.613140', 'step': 11927, 'epoch': 2} {'type': 'loss', 'content': 0.09726301580667496, 'timestamp': '2025-09-10 02:39:54.640940', 'step': 11928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:54.671898', 'step': 11928, 'epoch': 2} {'type': 'loss', 'content': 0.041895728558301926, 'timestamp': '2025-09-10 02:39:54.674280', 'step': 11929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:54.704135', 'step': 11929, 'epoch': 2} {'type': 'loss', 'content': 0.09796465188264847, 'timestamp': '2025-09-10 02:39:54.706459', 'step': 11930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:54.742637', 'step': 11930, 'epoch': 2} {'type': 'loss', 'content': 0.17245543003082275, 'timestamp': '2025-09-10 02:39:54.749701', 'step': 11931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:54.793933', 'step': 11931, 'epoch': 2} {'type': 'loss', 'content': 0.09766857326030731, 'timestamp': '2025-09-10 02:39:54.817782', 'step': 11932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:54.848026', 'step': 11932, 'epoch': 2} {'type': 'loss', 'content': 0.2108839601278305, 'timestamp': '2025-09-10 02:39:54.850383', 'step': 11933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:54.881581', 'step': 11933, 'epoch': 2} {'type': 'loss', 'content': 0.08150913566350937, 'timestamp': '2025-09-10 02:39:54.884184', 'step': 11934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:54.914641', 'step': 11934, 'epoch': 2} {'type': 'loss', 'content': 0.16269630193710327, 'timestamp': '2025-09-10 02:39:54.917316', 'step': 11935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:54.947784', 'step': 11935, 'epoch': 2} {'type': 'loss', 'content': 0.08461128920316696, 'timestamp': '2025-09-10 02:39:54.971600', 'step': 11936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.001477', 'step': 11936, 'epoch': 2} {'type': 'loss', 'content': 0.07888025045394897, 'timestamp': '2025-09-10 02:39:55.004019', 'step': 11937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.034213', 'step': 11937, 'epoch': 2} {'type': 'loss', 'content': 0.07270816713571548, 'timestamp': '2025-09-10 02:39:55.036604', 'step': 11938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:55.066340', 'step': 11938, 'epoch': 2} {'type': 'loss', 'content': 0.06291481852531433, 'timestamp': '2025-09-10 02:39:55.068662', 'step': 11939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:55.099037', 'step': 11939, 'epoch': 2} {'type': 'loss', 'content': 0.1423388123512268, 'timestamp': '2025-09-10 02:39:55.122874', 'step': 11940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.153006', 'step': 11940, 'epoch': 2} {'type': 'loss', 'content': 0.15159626305103302, 'timestamp': '2025-09-10 02:39:55.155681', 'step': 11941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.185527', 'step': 11941, 'epoch': 2} {'type': 'loss', 'content': 0.0767383947968483, 'timestamp': '2025-09-10 02:39:55.188252', 'step': 11942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.218921', 'step': 11942, 'epoch': 2} {'type': 'loss', 'content': 0.07261834293603897, 'timestamp': '2025-09-10 02:39:55.221634', 'step': 11943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.251561', 'step': 11943, 'epoch': 2} {'type': 'loss', 'content': 0.11947733163833618, 'timestamp': '2025-09-10 02:39:55.275180', 'step': 11944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:55.307126', 'step': 11944, 'epoch': 2} {'type': 'loss', 'content': 0.025957001373171806, 'timestamp': '2025-09-10 02:39:55.309537', 'step': 11945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:55.339361', 'step': 11945, 'epoch': 2} {'type': 'loss', 'content': 0.1425553411245346, 'timestamp': '2025-09-10 02:39:55.341869', 'step': 11946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:55.371706', 'step': 11946, 'epoch': 2} {'type': 'loss', 'content': 0.1270049661397934, 'timestamp': '2025-09-10 02:39:55.374123', 'step': 11947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:55.404328', 'step': 11947, 'epoch': 2} {'type': 'loss', 'content': 0.05707379803061485, 'timestamp': '2025-09-10 02:39:55.427777', 'step': 11948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:55.458316', 'step': 11948, 'epoch': 2} {'type': 'loss', 'content': 0.06887887418270111, 'timestamp': '2025-09-10 02:39:55.460956', 'step': 11949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:55.495568', 'step': 11949, 'epoch': 2} {'type': 'loss', 'content': 0.08428336679935455, 'timestamp': '2025-09-10 02:39:55.498447', 'step': 11950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:55.530676', 'step': 11950, 'epoch': 2} {'type': 'loss', 'content': 0.10946983098983765, 'timestamp': '2025-09-10 02:39:55.533518', 'step': 11951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:55.563880', 'step': 11951, 'epoch': 2} {'type': 'loss', 'content': 0.052706122398376465, 'timestamp': '2025-09-10 02:39:55.587478', 'step': 11952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.620820', 'step': 11952, 'epoch': 2} {'type': 'loss', 'content': 0.10990381240844727, 'timestamp': '2025-09-10 02:39:55.624897', 'step': 11953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:55.656168', 'step': 11953, 'epoch': 2} {'type': 'loss', 'content': 0.08332201093435287, 'timestamp': '2025-09-10 02:39:55.658506', 'step': 11954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.690428', 'step': 11954, 'epoch': 2} {'type': 'loss', 'content': 0.06862954795360565, 'timestamp': '2025-09-10 02:39:55.692765', 'step': 11955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:55.722663', 'step': 11955, 'epoch': 2} {'type': 'loss', 'content': 0.08962781727313995, 'timestamp': '2025-09-10 02:39:55.747210', 'step': 11956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:55.778066', 'step': 11956, 'epoch': 2} {'type': 'loss', 'content': 0.17790839076042175, 'timestamp': '2025-09-10 02:39:55.780443', 'step': 11957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:55.811488', 'step': 11957, 'epoch': 2} {'type': 'loss', 'content': 0.14608658850193024, 'timestamp': '2025-09-10 02:39:55.813999', 'step': 11958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.844107', 'step': 11958, 'epoch': 2} {'type': 'loss', 'content': 0.15796208381652832, 'timestamp': '2025-09-10 02:39:55.846689', 'step': 11959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:55.877445', 'step': 11959, 'epoch': 2} {'type': 'loss', 'content': 0.06582240760326385, 'timestamp': '2025-09-10 02:39:55.901017', 'step': 11960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:55.931120', 'step': 11960, 'epoch': 2} {'type': 'loss', 'content': 0.14417150616645813, 'timestamp': '2025-09-10 02:39:55.934163', 'step': 11961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:55.963913', 'step': 11961, 'epoch': 2} {'type': 'loss', 'content': 0.08478867262601852, 'timestamp': '2025-09-10 02:39:55.967666', 'step': 11962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:55.998717', 'step': 11962, 'epoch': 2} {'type': 'loss', 'content': 0.24288807809352875, 'timestamp': '2025-09-10 02:39:56.001191', 'step': 11963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:56.032059', 'step': 11963, 'epoch': 2} {'type': 'loss', 'content': 0.08755523711442947, 'timestamp': '2025-09-10 02:39:56.057076', 'step': 11964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:56.088229', 'step': 11964, 'epoch': 2} {'type': 'loss', 'content': 0.1339198350906372, 'timestamp': '2025-09-10 02:39:56.090295', 'step': 11965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:56.120372', 'step': 11965, 'epoch': 2} {'type': 'loss', 'content': 0.08089419454336166, 'timestamp': '2025-09-10 02:39:56.123147', 'step': 11966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:56.156391', 'step': 11966, 'epoch': 2} {'type': 'loss', 'content': 0.15025795996189117, 'timestamp': '2025-09-10 02:39:56.159250', 'step': 11967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:56.189524', 'step': 11967, 'epoch': 2} {'type': 'loss', 'content': 0.16324515640735626, 'timestamp': '2025-09-10 02:39:56.213620', 'step': 11968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:56.243969', 'step': 11968, 'epoch': 2} {'type': 'loss', 'content': 0.08671305328607559, 'timestamp': '2025-09-10 02:39:56.246387', 'step': 11969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:56.277367', 'step': 11969, 'epoch': 2} {'type': 'loss', 'content': 0.14795763790607452, 'timestamp': '2025-09-10 02:39:56.280245', 'step': 11970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:56.310584', 'step': 11970, 'epoch': 2} {'type': 'loss', 'content': 0.09487402439117432, 'timestamp': '2025-09-10 02:39:56.312861', 'step': 11971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:56.342666', 'step': 11971, 'epoch': 2} {'type': 'loss', 'content': 0.14138488471508026, 'timestamp': '2025-09-10 02:39:56.366275', 'step': 11972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:56.396815', 'step': 11972, 'epoch': 2} {'type': 'loss', 'content': 0.14259010553359985, 'timestamp': '2025-09-10 02:39:56.399573', 'step': 11973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:56.431325', 'step': 11973, 'epoch': 2} {'type': 'loss', 'content': 0.15051567554473877, 'timestamp': '2025-09-10 02:39:56.434991', 'step': 11974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:56.465370', 'step': 11974, 'epoch': 2} {'type': 'loss', 'content': 0.12139400094747543, 'timestamp': '2025-09-10 02:39:56.472078', 'step': 11975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:56.502620', 'step': 11975, 'epoch': 2} {'type': 'loss', 'content': 0.10701961070299149, 'timestamp': '2025-09-10 02:39:56.526679', 'step': 11976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:56.558544', 'step': 11976, 'epoch': 2} {'type': 'loss', 'content': 0.08456171303987503, 'timestamp': '2025-09-10 02:39:56.560945', 'step': 11977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:56.591112', 'step': 11977, 'epoch': 2} {'type': 'loss', 'content': 0.14111366868019104, 'timestamp': '2025-09-10 02:39:56.593557', 'step': 11978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:56.624212', 'step': 11978, 'epoch': 2} {'type': 'loss', 'content': 0.1274668276309967, 'timestamp': '2025-09-10 02:39:56.626666', 'step': 11979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:56.659509', 'step': 11979, 'epoch': 2} {'type': 'loss', 'content': 0.12454691529273987, 'timestamp': '2025-09-10 02:39:56.683088', 'step': 11980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:56.714453', 'step': 11980, 'epoch': 2} {'type': 'loss', 'content': 0.11478906124830246, 'timestamp': '2025-09-10 02:39:56.717111', 'step': 11981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:56.747548', 'step': 11981, 'epoch': 2} {'type': 'loss', 'content': 0.1709924042224884, 'timestamp': '2025-09-10 02:39:56.749853', 'step': 11982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:56.780827', 'step': 11982, 'epoch': 2} {'type': 'loss', 'content': 0.11094867438077927, 'timestamp': '2025-09-10 02:39:56.783095', 'step': 11983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:56.812144', 'step': 11983, 'epoch': 2} {'type': 'loss', 'content': 0.08000136911869049, 'timestamp': '2025-09-10 02:39:56.835742', 'step': 11984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:56.867896', 'step': 11984, 'epoch': 2} {'type': 'loss', 'content': 0.06491657346487045, 'timestamp': '2025-09-10 02:39:56.869944', 'step': 11985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:56.900589', 'step': 11985, 'epoch': 2} {'type': 'loss', 'content': 0.10778447985649109, 'timestamp': '2025-09-10 02:39:56.902977', 'step': 11986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:56.934181', 'step': 11986, 'epoch': 2} {'type': 'loss', 'content': 0.11936753243207932, 'timestamp': '2025-09-10 02:39:56.937295', 'step': 11987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:39:56.969551', 'step': 11987, 'epoch': 2} {'type': 'loss', 'content': 0.05209360271692276, 'timestamp': '2025-09-10 02:39:56.993130', 'step': 11988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:57.023914', 'step': 11988, 'epoch': 2} {'type': 'loss', 'content': 0.12901058793067932, 'timestamp': '2025-09-10 02:39:57.026482', 'step': 11989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:57.056827', 'step': 11989, 'epoch': 2} {'type': 'loss', 'content': 0.10512837767601013, 'timestamp': '2025-09-10 02:39:57.062214', 'step': 11990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:57.093553', 'step': 11990, 'epoch': 2} {'type': 'loss', 'content': 0.11081649363040924, 'timestamp': '2025-09-10 02:39:57.096271', 'step': 11991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:39:57.125917', 'step': 11991, 'epoch': 2} {'type': 'loss', 'content': 0.10133107006549835, 'timestamp': '2025-09-10 02:39:57.149841', 'step': 11992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:39:57.179755', 'step': 11992, 'epoch': 2} {'type': 'loss', 'content': 0.05163588374853134, 'timestamp': '2025-09-10 02:39:57.182408', 'step': 11993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:57.213283', 'step': 11993, 'epoch': 2} {'type': 'loss', 'content': 0.23373647034168243, 'timestamp': '2025-09-10 02:39:57.215515', 'step': 11994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:57.246841', 'step': 11994, 'epoch': 2} {'type': 'loss', 'content': 0.10003270953893661, 'timestamp': '2025-09-10 02:39:57.249132', 'step': 11995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:57.278968', 'step': 11995, 'epoch': 2} {'type': 'loss', 'content': 0.11726240813732147, 'timestamp': '2025-09-10 02:39:57.302667', 'step': 11996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:57.332750', 'step': 11996, 'epoch': 2} {'type': 'loss', 'content': 0.09752414375543594, 'timestamp': '2025-09-10 02:39:57.335335', 'step': 11997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:39:57.365886', 'step': 11997, 'epoch': 2} {'type': 'loss', 'content': 0.17456085979938507, 'timestamp': '2025-09-10 02:39:57.368205', 'step': 11998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:39:57.398861', 'step': 11998, 'epoch': 2} {'type': 'loss', 'content': 0.08189249783754349, 'timestamp': '2025-09-10 02:39:57.401273', 'step': 11999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:39:57.431448', 'step': 11999, 'epoch': 2} {'type': 'loss', 'content': 0.09515967220067978, 'timestamp': '2025-09-10 02:39:57.455219', 'step': 12000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12000', 'timestamp': '2025-09-10 02:40:03.609315', 'step': 12000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:03.656303', 'step': 12000, 'epoch': 2} {'type': 'loss', 'content': 0.13528689742088318, 'timestamp': '2025-09-10 02:40:03.658908', 'step': 12001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:40:03.690331', 'step': 12001, 'epoch': 2} {'type': 'loss', 'content': 0.06144142895936966, 'timestamp': '2025-09-10 02:40:03.694281', 'step': 12002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:03.726244', 'step': 12002, 'epoch': 2} {'type': 'loss', 'content': 0.1714266985654831, 'timestamp': '2025-09-10 02:40:03.729015', 'step': 12003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:03.760141', 'step': 12003, 'epoch': 2} {'type': 'loss', 'content': 0.04198583960533142, 'timestamp': '2025-09-10 02:40:03.784460', 'step': 12004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:03.814844', 'step': 12004, 'epoch': 2} {'type': 'loss', 'content': 0.1769891083240509, 'timestamp': '2025-09-10 02:40:03.817141', 'step': 12005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:03.846683', 'step': 12005, 'epoch': 2} {'type': 'loss', 'content': 0.05683446675539017, 'timestamp': '2025-09-10 02:40:03.849012', 'step': 12006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:03.878747', 'step': 12006, 'epoch': 2} {'type': 'loss', 'content': 0.0922747254371643, 'timestamp': '2025-09-10 02:40:03.881091', 'step': 12007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:03.911443', 'step': 12007, 'epoch': 2} {'type': 'loss', 'content': 0.12306153029203415, 'timestamp': '2025-09-10 02:40:03.935527', 'step': 12008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:03.965514', 'step': 12008, 'epoch': 2} {'type': 'loss', 'content': 0.08854777365922928, 'timestamp': '2025-09-10 02:40:03.967827', 'step': 12009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:03.998037', 'step': 12009, 'epoch': 2} {'type': 'loss', 'content': 0.1443355530500412, 'timestamp': '2025-09-10 02:40:04.000356', 'step': 12010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.030677', 'step': 12010, 'epoch': 2} {'type': 'loss', 'content': 0.0375589057803154, 'timestamp': '2025-09-10 02:40:04.033129', 'step': 12011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:04.063023', 'step': 12011, 'epoch': 2} {'type': 'loss', 'content': 0.07860933989286423, 'timestamp': '2025-09-10 02:40:04.086630', 'step': 12012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.127445', 'step': 12012, 'epoch': 2} {'type': 'loss', 'content': 0.09907995164394379, 'timestamp': '2025-09-10 02:40:04.130576', 'step': 12013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.162025', 'step': 12013, 'epoch': 2} {'type': 'loss', 'content': 0.10908125340938568, 'timestamp': '2025-09-10 02:40:04.164356', 'step': 12014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:04.194042', 'step': 12014, 'epoch': 2} {'type': 'loss', 'content': 0.12341427057981491, 'timestamp': '2025-09-10 02:40:04.196876', 'step': 12015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:04.227188', 'step': 12015, 'epoch': 2} {'type': 'loss', 'content': 0.07114289700984955, 'timestamp': '2025-09-10 02:40:04.251026', 'step': 12016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:04.282758', 'step': 12016, 'epoch': 2} {'type': 'loss', 'content': 0.10972870141267776, 'timestamp': '2025-09-10 02:40:04.285557', 'step': 12017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:04.316750', 'step': 12017, 'epoch': 2} {'type': 'loss', 'content': 0.0734110027551651, 'timestamp': '2025-09-10 02:40:04.319549', 'step': 12018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:04.349810', 'step': 12018, 'epoch': 2} {'type': 'loss', 'content': 0.05212017148733139, 'timestamp': '2025-09-10 02:40:04.352264', 'step': 12019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.382465', 'step': 12019, 'epoch': 2} {'type': 'loss', 'content': 0.09805407375097275, 'timestamp': '2025-09-10 02:40:04.406075', 'step': 12020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:04.436649', 'step': 12020, 'epoch': 2} {'type': 'loss', 'content': 0.08952213823795319, 'timestamp': '2025-09-10 02:40:04.438939', 'step': 12021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:04.470484', 'step': 12021, 'epoch': 2} {'type': 'loss', 'content': 0.09008413553237915, 'timestamp': '2025-09-10 02:40:04.472760', 'step': 12022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:04.502772', 'step': 12022, 'epoch': 2} {'type': 'loss', 'content': 0.10060509294271469, 'timestamp': '2025-09-10 02:40:04.505196', 'step': 12023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.535378', 'step': 12023, 'epoch': 2} {'type': 'loss', 'content': 0.15526051819324493, 'timestamp': '2025-09-10 02:40:04.558799', 'step': 12024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.589201', 'step': 12024, 'epoch': 2} {'type': 'loss', 'content': 0.09067371487617493, 'timestamp': '2025-09-10 02:40:04.591583', 'step': 12025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:04.620729', 'step': 12025, 'epoch': 2} {'type': 'loss', 'content': 0.06685329228639603, 'timestamp': '2025-09-10 02:40:04.623315', 'step': 12026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.654261', 'step': 12026, 'epoch': 2} {'type': 'loss', 'content': 0.1289689987897873, 'timestamp': '2025-09-10 02:40:04.656687', 'step': 12027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.686484', 'step': 12027, 'epoch': 2} {'type': 'loss', 'content': 0.0788007527589798, 'timestamp': '2025-09-10 02:40:04.710239', 'step': 12028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.740309', 'step': 12028, 'epoch': 2} {'type': 'loss', 'content': 0.1338767111301422, 'timestamp': '2025-09-10 02:40:04.742694', 'step': 12029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:04.773205', 'step': 12029, 'epoch': 2} {'type': 'loss', 'content': 0.18244478106498718, 'timestamp': '2025-09-10 02:40:04.775365', 'step': 12030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.806292', 'step': 12030, 'epoch': 2} {'type': 'loss', 'content': 0.07045141607522964, 'timestamp': '2025-09-10 02:40:04.808706', 'step': 12031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:04.838910', 'step': 12031, 'epoch': 2} {'type': 'loss', 'content': 0.0923791453242302, 'timestamp': '2025-09-10 02:40:04.862591', 'step': 12032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:04.893284', 'step': 12032, 'epoch': 2} {'type': 'loss', 'content': 0.13149453699588776, 'timestamp': '2025-09-10 02:40:04.895632', 'step': 12033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:04.925528', 'step': 12033, 'epoch': 2} {'type': 'loss', 'content': 0.05560294911265373, 'timestamp': '2025-09-10 02:40:04.928074', 'step': 12034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.957887', 'step': 12034, 'epoch': 2} {'type': 'loss', 'content': 0.10208792984485626, 'timestamp': '2025-09-10 02:40:04.960233', 'step': 12035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:04.990206', 'step': 12035, 'epoch': 2} {'type': 'loss', 'content': 0.06867995113134384, 'timestamp': '2025-09-10 02:40:05.013622', 'step': 12036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:05.043604', 'step': 12036, 'epoch': 2} {'type': 'loss', 'content': 0.11249227821826935, 'timestamp': '2025-09-10 02:40:05.045899', 'step': 12037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:05.075387', 'step': 12037, 'epoch': 2} {'type': 'loss', 'content': 0.08783259242773056, 'timestamp': '2025-09-10 02:40:05.077875', 'step': 12038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:05.108141', 'step': 12038, 'epoch': 2} {'type': 'loss', 'content': 0.05040813982486725, 'timestamp': '2025-09-10 02:40:05.110688', 'step': 12039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:05.141133', 'step': 12039, 'epoch': 2} {'type': 'loss', 'content': 0.06110868602991104, 'timestamp': '2025-09-10 02:40:05.166064', 'step': 12040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:05.197130', 'step': 12040, 'epoch': 2} {'type': 'loss', 'content': 0.05929037928581238, 'timestamp': '2025-09-10 02:40:05.199904', 'step': 12041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:05.230478', 'step': 12041, 'epoch': 2} {'type': 'loss', 'content': 0.11354642361402512, 'timestamp': '2025-09-10 02:40:05.233106', 'step': 12042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:05.263795', 'step': 12042, 'epoch': 2} {'type': 'loss', 'content': 0.15348263084888458, 'timestamp': '2025-09-10 02:40:05.266342', 'step': 12043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:05.296472', 'step': 12043, 'epoch': 2} {'type': 'loss', 'content': 0.03798913583159447, 'timestamp': '2025-09-10 02:40:05.320238', 'step': 12044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:05.350953', 'step': 12044, 'epoch': 2} {'type': 'loss', 'content': 0.1468392014503479, 'timestamp': '2025-09-10 02:40:05.352974', 'step': 12045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:05.382797', 'step': 12045, 'epoch': 2} {'type': 'loss', 'content': 0.07892414927482605, 'timestamp': '2025-09-10 02:40:05.385538', 'step': 12046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:05.416053', 'step': 12046, 'epoch': 2} {'type': 'loss', 'content': 0.07902918010950089, 'timestamp': '2025-09-10 02:40:05.418476', 'step': 12047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:05.448449', 'step': 12047, 'epoch': 2} {'type': 'loss', 'content': 0.11196871101856232, 'timestamp': '2025-09-10 02:40:05.471992', 'step': 12048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:05.502397', 'step': 12048, 'epoch': 2} {'type': 'loss', 'content': 0.11391167342662811, 'timestamp': '2025-09-10 02:40:05.504743', 'step': 12049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:05.535420', 'step': 12049, 'epoch': 2} {'type': 'loss', 'content': 0.07584795355796814, 'timestamp': '2025-09-10 02:40:05.537944', 'step': 12050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:05.567417', 'step': 12050, 'epoch': 2} {'type': 'loss', 'content': 0.06277409940958023, 'timestamp': '2025-09-10 02:40:05.582372', 'step': 12051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:40:05.617345', 'step': 12051, 'epoch': 2} {'type': 'loss', 'content': 0.09253417700529099, 'timestamp': '2025-09-10 02:40:05.642219', 'step': 12052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:05.673281', 'step': 12052, 'epoch': 2} {'type': 'loss', 'content': 0.07996627688407898, 'timestamp': '2025-09-10 02:40:05.677588', 'step': 12053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:05.717320', 'step': 12053, 'epoch': 2} {'type': 'loss', 'content': 0.17464935779571533, 'timestamp': '2025-09-10 02:40:05.719701', 'step': 12054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:05.749570', 'step': 12054, 'epoch': 2} {'type': 'loss', 'content': 0.12769098579883575, 'timestamp': '2025-09-10 02:40:05.752253', 'step': 12055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:05.782420', 'step': 12055, 'epoch': 2} {'type': 'loss', 'content': 0.09004031866788864, 'timestamp': '2025-09-10 02:40:05.807383', 'step': 12056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:05.838487', 'step': 12056, 'epoch': 2} {'type': 'loss', 'content': 0.10687743127346039, 'timestamp': '2025-09-10 02:40:05.840877', 'step': 12057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:05.870711', 'step': 12057, 'epoch': 2} {'type': 'loss', 'content': 0.0684109553694725, 'timestamp': '2025-09-10 02:40:05.873043', 'step': 12058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:05.903454', 'step': 12058, 'epoch': 2} {'type': 'loss', 'content': 0.13204555213451385, 'timestamp': '2025-09-10 02:40:05.905911', 'step': 12059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:05.935597', 'step': 12059, 'epoch': 2} {'type': 'loss', 'content': 0.1689942628145218, 'timestamp': '2025-09-10 02:40:05.960088', 'step': 12060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:05.991215', 'step': 12060, 'epoch': 2} {'type': 'loss', 'content': 0.10514336824417114, 'timestamp': '2025-09-10 02:40:05.993500', 'step': 12061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:06.023807', 'step': 12061, 'epoch': 2} {'type': 'loss', 'content': 0.06362366676330566, 'timestamp': '2025-09-10 02:40:06.026495', 'step': 12062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:06.057483', 'step': 12062, 'epoch': 2} {'type': 'loss', 'content': 0.13113906979560852, 'timestamp': '2025-09-10 02:40:06.060213', 'step': 12063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:06.090659', 'step': 12063, 'epoch': 2} {'type': 'loss', 'content': 0.1036505252122879, 'timestamp': '2025-09-10 02:40:06.113950', 'step': 12064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.144942', 'step': 12064, 'epoch': 2} {'type': 'loss', 'content': 0.09424777328968048, 'timestamp': '2025-09-10 02:40:06.147766', 'step': 12065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.177639', 'step': 12065, 'epoch': 2} {'type': 'loss', 'content': 0.10015171766281128, 'timestamp': '2025-09-10 02:40:06.179972', 'step': 12066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:06.210116', 'step': 12066, 'epoch': 2} {'type': 'loss', 'content': 0.05436716973781586, 'timestamp': '2025-09-10 02:40:06.212501', 'step': 12067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.242750', 'step': 12067, 'epoch': 2} {'type': 'loss', 'content': 0.0894278809428215, 'timestamp': '2025-09-10 02:40:06.266793', 'step': 12068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.297028', 'step': 12068, 'epoch': 2} {'type': 'loss', 'content': 0.06892083585262299, 'timestamp': '2025-09-10 02:40:06.299585', 'step': 12069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:06.329190', 'step': 12069, 'epoch': 2} {'type': 'loss', 'content': 0.10759326070547104, 'timestamp': '2025-09-10 02:40:06.331518', 'step': 12070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.361405', 'step': 12070, 'epoch': 2} {'type': 'loss', 'content': 0.13073278963565826, 'timestamp': '2025-09-10 02:40:06.363812', 'step': 12071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.394807', 'step': 12071, 'epoch': 2} {'type': 'loss', 'content': 0.09024803340435028, 'timestamp': '2025-09-10 02:40:06.418350', 'step': 12072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.449363', 'step': 12072, 'epoch': 2} {'type': 'loss', 'content': 0.22922974824905396, 'timestamp': '2025-09-10 02:40:06.451789', 'step': 12073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.483605', 'step': 12073, 'epoch': 2} {'type': 'loss', 'content': 0.09966616332530975, 'timestamp': '2025-09-10 02:40:06.485890', 'step': 12074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:06.515973', 'step': 12074, 'epoch': 2} {'type': 'loss', 'content': 0.06767307221889496, 'timestamp': '2025-09-10 02:40:06.518629', 'step': 12075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:06.549054', 'step': 12075, 'epoch': 2} {'type': 'loss', 'content': 0.05463962256908417, 'timestamp': '2025-09-10 02:40:06.572942', 'step': 12076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:06.602946', 'step': 12076, 'epoch': 2} {'type': 'loss', 'content': 0.05960379168391228, 'timestamp': '2025-09-10 02:40:06.605203', 'step': 12077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:06.634459', 'step': 12077, 'epoch': 2} {'type': 'loss', 'content': 0.04185662046074867, 'timestamp': '2025-09-10 02:40:06.636943', 'step': 12078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:06.667019', 'step': 12078, 'epoch': 2} {'type': 'loss', 'content': 0.10339698940515518, 'timestamp': '2025-09-10 02:40:06.670035', 'step': 12079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.699943', 'step': 12079, 'epoch': 2} {'type': 'loss', 'content': 0.22091159224510193, 'timestamp': '2025-09-10 02:40:06.723752', 'step': 12080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.753611', 'step': 12080, 'epoch': 2} {'type': 'loss', 'content': 0.09382462501525879, 'timestamp': '2025-09-10 02:40:06.755994', 'step': 12081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:06.787534', 'step': 12081, 'epoch': 2} {'type': 'loss', 'content': 0.09378161281347275, 'timestamp': '2025-09-10 02:40:06.789772', 'step': 12082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:06.820106', 'step': 12082, 'epoch': 2} {'type': 'loss', 'content': 0.08128239214420319, 'timestamp': '2025-09-10 02:40:06.822331', 'step': 12083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:06.852184', 'step': 12083, 'epoch': 2} {'type': 'loss', 'content': 0.11562731862068176, 'timestamp': '2025-09-10 02:40:06.875878', 'step': 12084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:06.906373', 'step': 12084, 'epoch': 2} {'type': 'loss', 'content': 0.24755826592445374, 'timestamp': '2025-09-10 02:40:06.909152', 'step': 12085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:06.939045', 'step': 12085, 'epoch': 2} {'type': 'loss', 'content': 0.15890508890151978, 'timestamp': '2025-09-10 02:40:06.941249', 'step': 12086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:06.970812', 'step': 12086, 'epoch': 2} {'type': 'loss', 'content': 0.10306255519390106, 'timestamp': '2025-09-10 02:40:06.973318', 'step': 12087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:07.003610', 'step': 12087, 'epoch': 2} {'type': 'loss', 'content': 0.0651947408914566, 'timestamp': '2025-09-10 02:40:07.027177', 'step': 12088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:07.057714', 'step': 12088, 'epoch': 2} {'type': 'loss', 'content': 0.13323916494846344, 'timestamp': '2025-09-10 02:40:07.060434', 'step': 12089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:07.091950', 'step': 12089, 'epoch': 2} {'type': 'loss', 'content': 0.14446775615215302, 'timestamp': '2025-09-10 02:40:07.094694', 'step': 12090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:07.124595', 'step': 12090, 'epoch': 2} {'type': 'loss', 'content': 0.05969978868961334, 'timestamp': '2025-09-10 02:40:07.126869', 'step': 12091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:07.157506', 'step': 12091, 'epoch': 2} {'type': 'loss', 'content': 0.09376971423625946, 'timestamp': '2025-09-10 02:40:07.181446', 'step': 12092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:07.219132', 'step': 12092, 'epoch': 2} {'type': 'loss', 'content': 0.15624235570430756, 'timestamp': '2025-09-10 02:40:07.221424', 'step': 12093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:07.251376', 'step': 12093, 'epoch': 2} {'type': 'loss', 'content': 0.09605681151151657, 'timestamp': '2025-09-10 02:40:07.254424', 'step': 12094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:07.285298', 'step': 12094, 'epoch': 2} {'type': 'loss', 'content': 0.08438795804977417, 'timestamp': '2025-09-10 02:40:07.287691', 'step': 12095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:07.318237', 'step': 12095, 'epoch': 2} {'type': 'loss', 'content': 0.05195746570825577, 'timestamp': '2025-09-10 02:40:07.341888', 'step': 12096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:07.372420', 'step': 12096, 'epoch': 2} {'type': 'loss', 'content': 0.18899297714233398, 'timestamp': '2025-09-10 02:40:07.374918', 'step': 12097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:07.408178', 'step': 12097, 'epoch': 2} {'type': 'loss', 'content': 0.14099083840847015, 'timestamp': '2025-09-10 02:40:07.410467', 'step': 12098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:07.440290', 'step': 12098, 'epoch': 2} {'type': 'loss', 'content': 0.20155788958072662, 'timestamp': '2025-09-10 02:40:07.442477', 'step': 12099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:07.472356', 'step': 12099, 'epoch': 2} {'type': 'loss', 'content': 0.17466038465499878, 'timestamp': '2025-09-10 02:40:07.499244', 'step': 12100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:07.534362', 'step': 12100, 'epoch': 2} {'type': 'loss', 'content': 0.11663367599248886, 'timestamp': '2025-09-10 02:40:07.536854', 'step': 12101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:07.571225', 'step': 12101, 'epoch': 2} {'type': 'loss', 'content': 0.08891677111387253, 'timestamp': '2025-09-10 02:40:07.575441', 'step': 12102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:07.606727', 'step': 12102, 'epoch': 2} {'type': 'loss', 'content': 0.03951864317059517, 'timestamp': '2025-09-10 02:40:07.609416', 'step': 12103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:07.639686', 'step': 12103, 'epoch': 2} {'type': 'loss', 'content': 0.14826664328575134, 'timestamp': '2025-09-10 02:40:07.663441', 'step': 12104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:07.693656', 'step': 12104, 'epoch': 2} {'type': 'loss', 'content': 0.11885696649551392, 'timestamp': '2025-09-10 02:40:07.696253', 'step': 12105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:07.726248', 'step': 12105, 'epoch': 2} {'type': 'loss', 'content': 0.08246955275535583, 'timestamp': '2025-09-10 02:40:07.728606', 'step': 12106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:07.758753', 'step': 12106, 'epoch': 2} {'type': 'loss', 'content': 0.13544751703739166, 'timestamp': '2025-09-10 02:40:07.761126', 'step': 12107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:07.790855', 'step': 12107, 'epoch': 2} {'type': 'loss', 'content': 0.08373032510280609, 'timestamp': '2025-09-10 02:40:07.815671', 'step': 12108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:07.846055', 'step': 12108, 'epoch': 2} {'type': 'loss', 'content': 0.0745515301823616, 'timestamp': '2025-09-10 02:40:07.848582', 'step': 12109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:07.878779', 'step': 12109, 'epoch': 2} {'type': 'loss', 'content': 0.11299047619104385, 'timestamp': '2025-09-10 02:40:07.881682', 'step': 12110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:07.912087', 'step': 12110, 'epoch': 2} {'type': 'loss', 'content': 0.21368560194969177, 'timestamp': '2025-09-10 02:40:07.914456', 'step': 12111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:07.944424', 'step': 12111, 'epoch': 2} {'type': 'loss', 'content': 0.04678037390112877, 'timestamp': '2025-09-10 02:40:07.968036', 'step': 12112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:08.002114', 'step': 12112, 'epoch': 2} {'type': 'loss', 'content': 0.09524978697299957, 'timestamp': '2025-09-10 02:40:08.004766', 'step': 12113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:08.034812', 'step': 12113, 'epoch': 2} {'type': 'loss', 'content': 0.2300703078508377, 'timestamp': '2025-09-10 02:40:08.036927', 'step': 12114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:08.067710', 'step': 12114, 'epoch': 2} {'type': 'loss', 'content': 0.11944107711315155, 'timestamp': '2025-09-10 02:40:08.070074', 'step': 12115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:08.100200', 'step': 12115, 'epoch': 2} {'type': 'loss', 'content': 0.06116073206067085, 'timestamp': '2025-09-10 02:40:08.124195', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:40:15.934646', 'step': 12116, 'epoch': 2} {'type': 'pplx', 'content': 11455.081619224771, 'timestamp': '2025-09-10 02:40:15.937859', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:15.967666', 'step': 12116, 'epoch': 2} {'type': 'loss', 'content': 0.10091850906610489, 'timestamp': '2025-09-10 02:40:15.969912', 'step': 12117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.000808', 'step': 12117, 'epoch': 2} {'type': 'loss', 'content': 0.1171853318810463, 'timestamp': '2025-09-10 02:40:16.003224', 'step': 12118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:16.033510', 'step': 12118, 'epoch': 2} {'type': 'loss', 'content': 0.08604384958744049, 'timestamp': '2025-09-10 02:40:16.036362', 'step': 12119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:16.066469', 'step': 12119, 'epoch': 2} {'type': 'loss', 'content': 0.13869856297969818, 'timestamp': '2025-09-10 02:40:16.090151', 'step': 12120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.120896', 'step': 12120, 'epoch': 2} {'type': 'loss', 'content': 0.0520700141787529, 'timestamp': '2025-09-10 02:40:16.123230', 'step': 12121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.153975', 'step': 12121, 'epoch': 2} {'type': 'loss', 'content': 0.09283777326345444, 'timestamp': '2025-09-10 02:40:16.156579', 'step': 12122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:16.188579', 'step': 12122, 'epoch': 2} {'type': 'loss', 'content': 0.11407869309186935, 'timestamp': '2025-09-10 02:40:16.190680', 'step': 12123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.221195', 'step': 12123, 'epoch': 2} {'type': 'loss', 'content': 0.11929012089967728, 'timestamp': '2025-09-10 02:40:16.244879', 'step': 12124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.276358', 'step': 12124, 'epoch': 2} {'type': 'loss', 'content': 0.060943786054849625, 'timestamp': '2025-09-10 02:40:16.278693', 'step': 12125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.311050', 'step': 12125, 'epoch': 2} {'type': 'loss', 'content': 0.11171150207519531, 'timestamp': '2025-09-10 02:40:16.315338', 'step': 12126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:16.345584', 'step': 12126, 'epoch': 2} {'type': 'loss', 'content': 0.07591429352760315, 'timestamp': '2025-09-10 02:40:16.348423', 'step': 12127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.380032', 'step': 12127, 'epoch': 2} {'type': 'loss', 'content': 0.1337815672159195, 'timestamp': '2025-09-10 02:40:16.404068', 'step': 12128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:16.434948', 'step': 12128, 'epoch': 2} {'type': 'loss', 'content': 0.08257021754980087, 'timestamp': '2025-09-10 02:40:16.437644', 'step': 12129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:16.468527', 'step': 12129, 'epoch': 2} {'type': 'loss', 'content': 0.14210335910320282, 'timestamp': '2025-09-10 02:40:16.471030', 'step': 12130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.501169', 'step': 12130, 'epoch': 2} {'type': 'loss', 'content': 0.15691795945167542, 'timestamp': '2025-09-10 02:40:16.503870', 'step': 12131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.534786', 'step': 12131, 'epoch': 2} {'type': 'loss', 'content': 0.1858227550983429, 'timestamp': '2025-09-10 02:40:16.559302', 'step': 12132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:16.592879', 'step': 12132, 'epoch': 2} {'type': 'loss', 'content': 0.11962921917438507, 'timestamp': '2025-09-10 02:40:16.595323', 'step': 12133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:16.625669', 'step': 12133, 'epoch': 2} {'type': 'loss', 'content': 0.12722614407539368, 'timestamp': '2025-09-10 02:40:16.632161', 'step': 12134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:16.672634', 'step': 12134, 'epoch': 2} {'type': 'loss', 'content': 0.13787280023097992, 'timestamp': '2025-09-10 02:40:16.677229', 'step': 12135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.708758', 'step': 12135, 'epoch': 2} {'type': 'loss', 'content': 0.13082826137542725, 'timestamp': '2025-09-10 02:40:16.732838', 'step': 12136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:16.763385', 'step': 12136, 'epoch': 2} {'type': 'loss', 'content': 0.10787032544612885, 'timestamp': '2025-09-10 02:40:16.766123', 'step': 12137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.796954', 'step': 12137, 'epoch': 2} {'type': 'loss', 'content': 0.09961424767971039, 'timestamp': '2025-09-10 02:40:16.799866', 'step': 12138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.830410', 'step': 12138, 'epoch': 2} {'type': 'loss', 'content': 0.15973079204559326, 'timestamp': '2025-09-10 02:40:16.832811', 'step': 12139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:16.863208', 'step': 12139, 'epoch': 2} {'type': 'loss', 'content': 0.0898379310965538, 'timestamp': '2025-09-10 02:40:16.887430', 'step': 12140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:16.922225', 'step': 12140, 'epoch': 2} {'type': 'loss', 'content': 0.08531277626752853, 'timestamp': '2025-09-10 02:40:16.925353', 'step': 12141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:16.958206', 'step': 12141, 'epoch': 2} {'type': 'loss', 'content': 0.14019200205802917, 'timestamp': '2025-09-10 02:40:16.962596', 'step': 12142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:16.997493', 'step': 12142, 'epoch': 2} {'type': 'loss', 'content': 0.027655845507979393, 'timestamp': '2025-09-10 02:40:17.000870', 'step': 12143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:17.035531', 'step': 12143, 'epoch': 2} {'type': 'loss', 'content': 0.1307803988456726, 'timestamp': '2025-09-10 02:40:17.059340', 'step': 12144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:17.089992', 'step': 12144, 'epoch': 2} {'type': 'loss', 'content': 0.08304891735315323, 'timestamp': '2025-09-10 02:40:17.092334', 'step': 12145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:17.124354', 'step': 12145, 'epoch': 2} {'type': 'loss', 'content': 0.18537312746047974, 'timestamp': '2025-09-10 02:40:17.126982', 'step': 12146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:17.157027', 'step': 12146, 'epoch': 2} {'type': 'loss', 'content': 0.10043298453092575, 'timestamp': '2025-09-10 02:40:17.159510', 'step': 12147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:17.189964', 'step': 12147, 'epoch': 2} {'type': 'loss', 'content': 0.07636713981628418, 'timestamp': '2025-09-10 02:40:17.214031', 'step': 12148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:17.246398', 'step': 12148, 'epoch': 2} {'type': 'loss', 'content': 0.03732270747423172, 'timestamp': '2025-09-10 02:40:17.249027', 'step': 12149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:17.279818', 'step': 12149, 'epoch': 2} {'type': 'loss', 'content': 0.08763531595468521, 'timestamp': '2025-09-10 02:40:17.282495', 'step': 12150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:17.313551', 'step': 12150, 'epoch': 2} {'type': 'loss', 'content': 0.11714484542608261, 'timestamp': '2025-09-10 02:40:17.316397', 'step': 12151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:17.347006', 'step': 12151, 'epoch': 2} {'type': 'loss', 'content': 0.05867224931716919, 'timestamp': '2025-09-10 02:40:17.370746', 'step': 12152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:17.400944', 'step': 12152, 'epoch': 2} {'type': 'loss', 'content': 0.07249223440885544, 'timestamp': '2025-09-10 02:40:17.403469', 'step': 12153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:17.433463', 'step': 12153, 'epoch': 2} {'type': 'loss', 'content': 0.12951397895812988, 'timestamp': '2025-09-10 02:40:17.435863', 'step': 12154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:17.466412', 'step': 12154, 'epoch': 2} {'type': 'loss', 'content': 0.0700346827507019, 'timestamp': '2025-09-10 02:40:17.468976', 'step': 12155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:17.499221', 'step': 12155, 'epoch': 2} {'type': 'loss', 'content': 0.09134386479854584, 'timestamp': '2025-09-10 02:40:17.522789', 'step': 12156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:17.554981', 'step': 12156, 'epoch': 2} {'type': 'loss', 'content': 0.07088737189769745, 'timestamp': '2025-09-10 02:40:17.557597', 'step': 12157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:17.588093', 'step': 12157, 'epoch': 2} {'type': 'loss', 'content': 0.1793052703142166, 'timestamp': '2025-09-10 02:40:17.590427', 'step': 12158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:17.621559', 'step': 12158, 'epoch': 2} {'type': 'loss', 'content': 0.10206591337919235, 'timestamp': '2025-09-10 02:40:17.624092', 'step': 12159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:17.655677', 'step': 12159, 'epoch': 2} {'type': 'loss', 'content': 0.22143098711967468, 'timestamp': '2025-09-10 02:40:17.679819', 'step': 12160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:17.710361', 'step': 12160, 'epoch': 2} {'type': 'loss', 'content': 0.11327799409627914, 'timestamp': '2025-09-10 02:40:17.712808', 'step': 12161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:17.743894', 'step': 12161, 'epoch': 2} {'type': 'loss', 'content': 0.06370095163583755, 'timestamp': '2025-09-10 02:40:17.746179', 'step': 12162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:17.777116', 'step': 12162, 'epoch': 2} {'type': 'loss', 'content': 0.08510449528694153, 'timestamp': '2025-09-10 02:40:17.780907', 'step': 12163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:17.815380', 'step': 12163, 'epoch': 2} {'type': 'loss', 'content': 0.15814155340194702, 'timestamp': '2025-09-10 02:40:17.839237', 'step': 12164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:17.870658', 'step': 12164, 'epoch': 2} {'type': 'loss', 'content': 0.07325194031000137, 'timestamp': '2025-09-10 02:40:17.873251', 'step': 12165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:17.903901', 'step': 12165, 'epoch': 2} {'type': 'loss', 'content': 0.10169776529073715, 'timestamp': '2025-09-10 02:40:17.906429', 'step': 12166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:17.937128', 'step': 12166, 'epoch': 2} {'type': 'loss', 'content': 0.07926269620656967, 'timestamp': '2025-09-10 02:40:17.939561', 'step': 12167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:17.969888', 'step': 12167, 'epoch': 2} {'type': 'loss', 'content': 0.10054705291986465, 'timestamp': '2025-09-10 02:40:17.993903', 'step': 12168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:18.025059', 'step': 12168, 'epoch': 2} {'type': 'loss', 'content': 0.05867806077003479, 'timestamp': '2025-09-10 02:40:18.027822', 'step': 12169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.059372', 'step': 12169, 'epoch': 2} {'type': 'loss', 'content': 0.10371184349060059, 'timestamp': '2025-09-10 02:40:18.061999', 'step': 12170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:18.092155', 'step': 12170, 'epoch': 2} {'type': 'loss', 'content': 0.12374470382928848, 'timestamp': '2025-09-10 02:40:18.094871', 'step': 12171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.126286', 'step': 12171, 'epoch': 2} {'type': 'loss', 'content': 0.14168845117092133, 'timestamp': '2025-09-10 02:40:18.150723', 'step': 12172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.187370', 'step': 12172, 'epoch': 2} {'type': 'loss', 'content': 0.17549532651901245, 'timestamp': '2025-09-10 02:40:18.189703', 'step': 12173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:18.220229', 'step': 12173, 'epoch': 2} {'type': 'loss', 'content': 0.32393401861190796, 'timestamp': '2025-09-10 02:40:18.222611', 'step': 12174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:18.252708', 'step': 12174, 'epoch': 2} {'type': 'loss', 'content': 0.21059098839759827, 'timestamp': '2025-09-10 02:40:18.255129', 'step': 12175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.285332', 'step': 12175, 'epoch': 2} {'type': 'loss', 'content': 0.14632077515125275, 'timestamp': '2025-09-10 02:40:18.308793', 'step': 12176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.340715', 'step': 12176, 'epoch': 2} {'type': 'loss', 'content': 0.1377367526292801, 'timestamp': '2025-09-10 02:40:18.342933', 'step': 12177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.373130', 'step': 12177, 'epoch': 2} {'type': 'loss', 'content': 0.0857338160276413, 'timestamp': '2025-09-10 02:40:18.375511', 'step': 12178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:18.405743', 'step': 12178, 'epoch': 2} {'type': 'loss', 'content': 0.14419056475162506, 'timestamp': '2025-09-10 02:40:18.408533', 'step': 12179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:18.441170', 'step': 12179, 'epoch': 2} {'type': 'loss', 'content': 0.048808593302965164, 'timestamp': '2025-09-10 02:40:18.465667', 'step': 12180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.496205', 'step': 12180, 'epoch': 2} {'type': 'loss', 'content': 0.16059714555740356, 'timestamp': '2025-09-10 02:40:18.498510', 'step': 12181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:18.529387', 'step': 12181, 'epoch': 2} {'type': 'loss', 'content': 0.07845838367938995, 'timestamp': '2025-09-10 02:40:18.532905', 'step': 12182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.567551', 'step': 12182, 'epoch': 2} {'type': 'loss', 'content': 0.05628290772438049, 'timestamp': '2025-09-10 02:40:18.570352', 'step': 12183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:18.600622', 'step': 12183, 'epoch': 2} {'type': 'loss', 'content': 0.04189623147249222, 'timestamp': '2025-09-10 02:40:18.624251', 'step': 12184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.654735', 'step': 12184, 'epoch': 2} {'type': 'loss', 'content': 0.05173570290207863, 'timestamp': '2025-09-10 02:40:18.657122', 'step': 12185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:18.687264', 'step': 12185, 'epoch': 2} {'type': 'loss', 'content': 0.2641446590423584, 'timestamp': '2025-09-10 02:40:18.689696', 'step': 12186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.720286', 'step': 12186, 'epoch': 2} {'type': 'loss', 'content': 0.19664356112480164, 'timestamp': '2025-09-10 02:40:18.722908', 'step': 12187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:18.752993', 'step': 12187, 'epoch': 2} {'type': 'loss', 'content': 0.19942718744277954, 'timestamp': '2025-09-10 02:40:18.777350', 'step': 12188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:18.807965', 'step': 12188, 'epoch': 2} {'type': 'loss', 'content': 0.04585789144039154, 'timestamp': '2025-09-10 02:40:18.810097', 'step': 12189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:18.841760', 'step': 12189, 'epoch': 2} {'type': 'loss', 'content': 0.026570947840809822, 'timestamp': '2025-09-10 02:40:18.844352', 'step': 12190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:18.874222', 'step': 12190, 'epoch': 2} {'type': 'loss', 'content': 0.11102037131786346, 'timestamp': '2025-09-10 02:40:18.876470', 'step': 12191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.906247', 'step': 12191, 'epoch': 2} {'type': 'loss', 'content': 0.11244329810142517, 'timestamp': '2025-09-10 02:40:18.929735', 'step': 12192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:18.959725', 'step': 12192, 'epoch': 2} {'type': 'loss', 'content': 0.10016199201345444, 'timestamp': '2025-09-10 02:40:18.962090', 'step': 12193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:18.991778', 'step': 12193, 'epoch': 2} {'type': 'loss', 'content': 0.08496037125587463, 'timestamp': '2025-09-10 02:40:18.994137', 'step': 12194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:19.024253', 'step': 12194, 'epoch': 2} {'type': 'loss', 'content': 0.20554021000862122, 'timestamp': '2025-09-10 02:40:19.027041', 'step': 12195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:19.057409', 'step': 12195, 'epoch': 2} {'type': 'loss', 'content': 0.09747253358364105, 'timestamp': '2025-09-10 02:40:19.080840', 'step': 12196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:19.110764', 'step': 12196, 'epoch': 2} {'type': 'loss', 'content': 0.04734640568494797, 'timestamp': '2025-09-10 02:40:19.113110', 'step': 12197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:19.144625', 'step': 12197, 'epoch': 2} {'type': 'loss', 'content': 0.1391240656375885, 'timestamp': '2025-09-10 02:40:19.153428', 'step': 12198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:19.185705', 'step': 12198, 'epoch': 2} {'type': 'loss', 'content': 0.08371111750602722, 'timestamp': '2025-09-10 02:40:19.188672', 'step': 12199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:19.219773', 'step': 12199, 'epoch': 2} {'type': 'loss', 'content': 0.10722178220748901, 'timestamp': '2025-09-10 02:40:19.244497', 'step': 12200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:19.274827', 'step': 12200, 'epoch': 2} {'type': 'loss', 'content': 0.136327862739563, 'timestamp': '2025-09-10 02:40:19.276773', 'step': 12201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:19.307245', 'step': 12201, 'epoch': 2} {'type': 'loss', 'content': 0.06369469314813614, 'timestamp': '2025-09-10 02:40:19.309536', 'step': 12202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:19.339798', 'step': 12202, 'epoch': 2} {'type': 'loss', 'content': 0.12848305702209473, 'timestamp': '2025-09-10 02:40:19.342548', 'step': 12203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:19.373406', 'step': 12203, 'epoch': 2} {'type': 'loss', 'content': 0.030710166320204735, 'timestamp': '2025-09-10 02:40:19.396761', 'step': 12204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:19.427201', 'step': 12204, 'epoch': 2} {'type': 'loss', 'content': 0.0859575867652893, 'timestamp': '2025-09-10 02:40:19.429161', 'step': 12205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:19.459065', 'step': 12205, 'epoch': 2} {'type': 'loss', 'content': 0.21749049425125122, 'timestamp': '2025-09-10 02:40:19.461175', 'step': 12206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:19.492292', 'step': 12206, 'epoch': 2} {'type': 'loss', 'content': 0.10147837549448013, 'timestamp': '2025-09-10 02:40:19.494633', 'step': 12207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:19.524775', 'step': 12207, 'epoch': 2} {'type': 'loss', 'content': 0.05943930521607399, 'timestamp': '2025-09-10 02:40:19.548887', 'step': 12208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:19.578776', 'step': 12208, 'epoch': 2} {'type': 'loss', 'content': 0.13269305229187012, 'timestamp': '2025-09-10 02:40:19.580910', 'step': 12209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:19.610854', 'step': 12209, 'epoch': 2} {'type': 'loss', 'content': 0.057368841022253036, 'timestamp': '2025-09-10 02:40:19.613422', 'step': 12210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:19.644195', 'step': 12210, 'epoch': 2} {'type': 'loss', 'content': 0.14865323901176453, 'timestamp': '2025-09-10 02:40:19.651864', 'step': 12211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:19.694204', 'step': 12211, 'epoch': 2} {'type': 'loss', 'content': 0.1198546439409256, 'timestamp': '2025-09-10 02:40:19.717731', 'step': 12212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:19.749479', 'step': 12212, 'epoch': 2} {'type': 'loss', 'content': 0.16093416512012482, 'timestamp': '2025-09-10 02:40:19.752532', 'step': 12213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:19.783272', 'step': 12213, 'epoch': 2} {'type': 'loss', 'content': 0.157334566116333, 'timestamp': '2025-09-10 02:40:19.785505', 'step': 12214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:19.816471', 'step': 12214, 'epoch': 2} {'type': 'loss', 'content': 0.1429518759250641, 'timestamp': '2025-09-10 02:40:19.818657', 'step': 12215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:19.848786', 'step': 12215, 'epoch': 2} {'type': 'loss', 'content': 0.14137890934944153, 'timestamp': '2025-09-10 02:40:19.872341', 'step': 12216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:19.902299', 'step': 12216, 'epoch': 2} {'type': 'loss', 'content': 0.11861597001552582, 'timestamp': '2025-09-10 02:40:19.904533', 'step': 12217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:19.934566', 'step': 12217, 'epoch': 2} {'type': 'loss', 'content': 0.09694827347993851, 'timestamp': '2025-09-10 02:40:19.936721', 'step': 12218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:19.966431', 'step': 12218, 'epoch': 2} {'type': 'loss', 'content': 0.14819450676441193, 'timestamp': '2025-09-10 02:40:19.968874', 'step': 12219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:19.999308', 'step': 12219, 'epoch': 2} {'type': 'loss', 'content': 0.06049749627709389, 'timestamp': '2025-09-10 02:40:20.023016', 'step': 12220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.056577', 'step': 12220, 'epoch': 2} {'type': 'loss', 'content': 0.12478628009557724, 'timestamp': '2025-09-10 02:40:20.059585', 'step': 12221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.090981', 'step': 12221, 'epoch': 2} {'type': 'loss', 'content': 0.03596888482570648, 'timestamp': '2025-09-10 02:40:20.093638', 'step': 12222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.124226', 'step': 12222, 'epoch': 2} {'type': 'loss', 'content': 0.04894287884235382, 'timestamp': '2025-09-10 02:40:20.126665', 'step': 12223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:20.158649', 'step': 12223, 'epoch': 2} {'type': 'loss', 'content': 0.10239066183567047, 'timestamp': '2025-09-10 02:40:20.185334', 'step': 12224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.216129', 'step': 12224, 'epoch': 2} {'type': 'loss', 'content': 0.1141485795378685, 'timestamp': '2025-09-10 02:40:20.218444', 'step': 12225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:20.249491', 'step': 12225, 'epoch': 2} {'type': 'loss', 'content': 0.25964152812957764, 'timestamp': '2025-09-10 02:40:20.252098', 'step': 12226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.282078', 'step': 12226, 'epoch': 2} {'type': 'loss', 'content': 0.07935847342014313, 'timestamp': '2025-09-10 02:40:20.284786', 'step': 12227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:20.316333', 'step': 12227, 'epoch': 2} {'type': 'loss', 'content': 0.14176613092422485, 'timestamp': '2025-09-10 02:40:20.340387', 'step': 12228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:20.370487', 'step': 12228, 'epoch': 2} {'type': 'loss', 'content': 0.09032437950372696, 'timestamp': '2025-09-10 02:40:20.372990', 'step': 12229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:20.402911', 'step': 12229, 'epoch': 2} {'type': 'loss', 'content': 0.14002150297164917, 'timestamp': '2025-09-10 02:40:20.405752', 'step': 12230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.436200', 'step': 12230, 'epoch': 2} {'type': 'loss', 'content': 0.06870761513710022, 'timestamp': '2025-09-10 02:40:20.439388', 'step': 12231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.469901', 'step': 12231, 'epoch': 2} {'type': 'loss', 'content': 0.041619881987571716, 'timestamp': '2025-09-10 02:40:20.493819', 'step': 12232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.525004', 'step': 12232, 'epoch': 2} {'type': 'loss', 'content': 0.15365257859230042, 'timestamp': '2025-09-10 02:40:20.527531', 'step': 12233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:20.557840', 'step': 12233, 'epoch': 2} {'type': 'loss', 'content': 0.1501137912273407, 'timestamp': '2025-09-10 02:40:20.560248', 'step': 12234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:20.590186', 'step': 12234, 'epoch': 2} {'type': 'loss', 'content': 0.15666058659553528, 'timestamp': '2025-09-10 02:40:20.592638', 'step': 12235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.623373', 'step': 12235, 'epoch': 2} {'type': 'loss', 'content': 0.05312865599989891, 'timestamp': '2025-09-10 02:40:20.647212', 'step': 12236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:20.677852', 'step': 12236, 'epoch': 2} {'type': 'loss', 'content': 0.10465756803750992, 'timestamp': '2025-09-10 02:40:20.679848', 'step': 12237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:20.709968', 'step': 12237, 'epoch': 2} {'type': 'loss', 'content': 0.056539442390203476, 'timestamp': '2025-09-10 02:40:20.712592', 'step': 12238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.743476', 'step': 12238, 'epoch': 2} {'type': 'loss', 'content': 0.09144564718008041, 'timestamp': '2025-09-10 02:40:20.745708', 'step': 12239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:20.775465', 'step': 12239, 'epoch': 2} {'type': 'loss', 'content': 0.09053365886211395, 'timestamp': '2025-09-10 02:40:20.798578', 'step': 12240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:20.828565', 'step': 12240, 'epoch': 2} {'type': 'loss', 'content': 0.19361844658851624, 'timestamp': '2025-09-10 02:40:20.830893', 'step': 12241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:20.861411', 'step': 12241, 'epoch': 2} {'type': 'loss', 'content': 0.20152342319488525, 'timestamp': '2025-09-10 02:40:20.864157', 'step': 12242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:20.894115', 'step': 12242, 'epoch': 2} {'type': 'loss', 'content': 0.0771227702498436, 'timestamp': '2025-09-10 02:40:20.896571', 'step': 12243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:20.926967', 'step': 12243, 'epoch': 2} {'type': 'loss', 'content': 0.2545156180858612, 'timestamp': '2025-09-10 02:40:20.952600', 'step': 12244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:20.983781', 'step': 12244, 'epoch': 2} {'type': 'loss', 'content': 0.08424901962280273, 'timestamp': '2025-09-10 02:40:20.986529', 'step': 12245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.017558', 'step': 12245, 'epoch': 2} {'type': 'loss', 'content': 0.0801534578204155, 'timestamp': '2025-09-10 02:40:21.020192', 'step': 12246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.051179', 'step': 12246, 'epoch': 2} {'type': 'loss', 'content': 0.0464417040348053, 'timestamp': '2025-09-10 02:40:21.053555', 'step': 12247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.083916', 'step': 12247, 'epoch': 2} {'type': 'loss', 'content': 0.04878394678235054, 'timestamp': '2025-09-10 02:40:21.107469', 'step': 12248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:21.138144', 'step': 12248, 'epoch': 2} {'type': 'loss', 'content': 0.12003172934055328, 'timestamp': '2025-09-10 02:40:21.140996', 'step': 12249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.171757', 'step': 12249, 'epoch': 2} {'type': 'loss', 'content': 0.10011925548315048, 'timestamp': '2025-09-10 02:40:21.174235', 'step': 12250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.204736', 'step': 12250, 'epoch': 2} {'type': 'loss', 'content': 0.07687786221504211, 'timestamp': '2025-09-10 02:40:21.207227', 'step': 12251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:21.237249', 'step': 12251, 'epoch': 2} {'type': 'loss', 'content': 0.2156018614768982, 'timestamp': '2025-09-10 02:40:21.260899', 'step': 12252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.291065', 'step': 12252, 'epoch': 2} {'type': 'loss', 'content': 0.1892736703157425, 'timestamp': '2025-09-10 02:40:21.293972', 'step': 12253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:21.326205', 'step': 12253, 'epoch': 2} {'type': 'loss', 'content': 0.10197633504867554, 'timestamp': '2025-09-10 02:40:21.328438', 'step': 12254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:21.358518', 'step': 12254, 'epoch': 2} {'type': 'loss', 'content': 0.10776809602975845, 'timestamp': '2025-09-10 02:40:21.360983', 'step': 12255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:21.398840', 'step': 12255, 'epoch': 2} {'type': 'loss', 'content': 0.06680924445390701, 'timestamp': '2025-09-10 02:40:21.430039', 'step': 12256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:21.461648', 'step': 12256, 'epoch': 2} {'type': 'loss', 'content': 0.113813616335392, 'timestamp': '2025-09-10 02:40:21.464770', 'step': 12257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:21.500289', 'step': 12257, 'epoch': 2} {'type': 'loss', 'content': 0.13742920756340027, 'timestamp': '2025-09-10 02:40:21.504197', 'step': 12258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.539384', 'step': 12258, 'epoch': 2} {'type': 'loss', 'content': 0.08907094597816467, 'timestamp': '2025-09-10 02:40:21.541953', 'step': 12259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.579454', 'step': 12259, 'epoch': 2} {'type': 'loss', 'content': 0.0780566930770874, 'timestamp': '2025-09-10 02:40:21.603837', 'step': 12260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:21.638583', 'step': 12260, 'epoch': 2} {'type': 'loss', 'content': 0.06829547882080078, 'timestamp': '2025-09-10 02:40:21.641191', 'step': 12261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:21.673728', 'step': 12261, 'epoch': 2} {'type': 'loss', 'content': 0.12060091644525528, 'timestamp': '2025-09-10 02:40:21.676474', 'step': 12262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:21.707530', 'step': 12262, 'epoch': 2} {'type': 'loss', 'content': 0.1676415354013443, 'timestamp': '2025-09-10 02:40:21.710093', 'step': 12263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:21.741598', 'step': 12263, 'epoch': 2} {'type': 'loss', 'content': 0.14343619346618652, 'timestamp': '2025-09-10 02:40:21.764986', 'step': 12264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:21.815318', 'step': 12264, 'epoch': 2} {'type': 'loss', 'content': 0.25669217109680176, 'timestamp': '2025-09-10 02:40:21.819871', 'step': 12265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.851381', 'step': 12265, 'epoch': 2} {'type': 'loss', 'content': 0.11437857896089554, 'timestamp': '2025-09-10 02:40:21.853489', 'step': 12266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:21.888181', 'step': 12266, 'epoch': 2} {'type': 'loss', 'content': 0.04245414584875107, 'timestamp': '2025-09-10 02:40:21.890443', 'step': 12267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:21.921303', 'step': 12267, 'epoch': 2} {'type': 'loss', 'content': 0.12835806608200073, 'timestamp': '2025-09-10 02:40:21.944768', 'step': 12268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:21.985229', 'step': 12268, 'epoch': 2} {'type': 'loss', 'content': 0.07856802642345428, 'timestamp': '2025-09-10 02:40:21.987540', 'step': 12269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:22.018805', 'step': 12269, 'epoch': 2} {'type': 'loss', 'content': 0.1538260281085968, 'timestamp': '2025-09-10 02:40:22.022140', 'step': 12270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:22.056572', 'step': 12270, 'epoch': 2} {'type': 'loss', 'content': 0.12500493228435516, 'timestamp': '2025-09-10 02:40:22.060125', 'step': 12271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:22.094757', 'step': 12271, 'epoch': 2} {'type': 'loss', 'content': 0.13134004175662994, 'timestamp': '2025-09-10 02:40:22.119128', 'step': 12272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:22.154387', 'step': 12272, 'epoch': 2} {'type': 'loss', 'content': 0.14942938089370728, 'timestamp': '2025-09-10 02:40:22.159587', 'step': 12273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:22.190187', 'step': 12273, 'epoch': 2} {'type': 'loss', 'content': 0.10791880637407303, 'timestamp': '2025-09-10 02:40:22.194064', 'step': 12274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:22.225458', 'step': 12274, 'epoch': 2} {'type': 'loss', 'content': 0.06933340430259705, 'timestamp': '2025-09-10 02:40:22.229727', 'step': 12275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:22.261500', 'step': 12275, 'epoch': 2} {'type': 'loss', 'content': 0.07719606906175613, 'timestamp': '2025-09-10 02:40:22.285329', 'step': 12276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:22.323348', 'step': 12276, 'epoch': 2} {'type': 'loss', 'content': 0.07404924184083939, 'timestamp': '2025-09-10 02:40:22.326962', 'step': 12277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:22.357459', 'step': 12277, 'epoch': 2} {'type': 'loss', 'content': 0.09643375873565674, 'timestamp': '2025-09-10 02:40:22.359642', 'step': 12278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:22.391142', 'step': 12278, 'epoch': 2} {'type': 'loss', 'content': 0.051361389458179474, 'timestamp': '2025-09-10 02:40:22.393306', 'step': 12279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:22.426596', 'step': 12279, 'epoch': 2} {'type': 'loss', 'content': 0.06503934413194656, 'timestamp': '2025-09-10 02:40:22.450190', 'step': 12280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:22.480435', 'step': 12280, 'epoch': 2} {'type': 'loss', 'content': 0.04893482103943825, 'timestamp': '2025-09-10 02:40:22.483012', 'step': 12281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:22.513780', 'step': 12281, 'epoch': 2} {'type': 'loss', 'content': 0.16390006244182587, 'timestamp': '2025-09-10 02:40:22.516449', 'step': 12282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:22.546767', 'step': 12282, 'epoch': 2} {'type': 'loss', 'content': 0.02028212882578373, 'timestamp': '2025-09-10 02:40:22.549566', 'step': 12283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:22.582325', 'step': 12283, 'epoch': 2} {'type': 'loss', 'content': 0.050066299736499786, 'timestamp': '2025-09-10 02:40:22.606228', 'step': 12284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:22.637131', 'step': 12284, 'epoch': 2} {'type': 'loss', 'content': 0.05778028070926666, 'timestamp': '2025-09-10 02:40:22.639701', 'step': 12285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:22.669939', 'step': 12285, 'epoch': 2} {'type': 'loss', 'content': 0.07893256843090057, 'timestamp': '2025-09-10 02:40:22.673429', 'step': 12286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:22.704772', 'step': 12286, 'epoch': 2} {'type': 'loss', 'content': 0.02211565151810646, 'timestamp': '2025-09-10 02:40:22.707178', 'step': 12287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:22.737536', 'step': 12287, 'epoch': 2} {'type': 'loss', 'content': 0.07607832551002502, 'timestamp': '2025-09-10 02:40:22.761238', 'step': 12288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:22.792335', 'step': 12288, 'epoch': 2} {'type': 'loss', 'content': 0.07141736149787903, 'timestamp': '2025-09-10 02:40:22.794733', 'step': 12289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:22.825389', 'step': 12289, 'epoch': 2} {'type': 'loss', 'content': 0.07881435751914978, 'timestamp': '2025-09-10 02:40:22.827784', 'step': 12290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:22.858510', 'step': 12290, 'epoch': 2} {'type': 'loss', 'content': 0.1029086783528328, 'timestamp': '2025-09-10 02:40:22.860654', 'step': 12291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:22.892659', 'step': 12291, 'epoch': 2} {'type': 'loss', 'content': 0.12457361817359924, 'timestamp': '2025-09-10 02:40:22.916329', 'step': 12292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:22.948124', 'step': 12292, 'epoch': 2} {'type': 'loss', 'content': 0.057004414498806, 'timestamp': '2025-09-10 02:40:22.950533', 'step': 12293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:22.981434', 'step': 12293, 'epoch': 2} {'type': 'loss', 'content': 0.06893187761306763, 'timestamp': '2025-09-10 02:40:22.983671', 'step': 12294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:23.013996', 'step': 12294, 'epoch': 2} {'type': 'loss', 'content': 0.04500272125005722, 'timestamp': '2025-09-10 02:40:23.016366', 'step': 12295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:23.049879', 'step': 12295, 'epoch': 2} {'type': 'loss', 'content': 0.11423857510089874, 'timestamp': '2025-09-10 02:40:23.073610', 'step': 12296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:23.105165', 'step': 12296, 'epoch': 2} {'type': 'loss', 'content': 0.03433211147785187, 'timestamp': '2025-09-10 02:40:23.108632', 'step': 12297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:23.139041', 'step': 12297, 'epoch': 2} {'type': 'loss', 'content': 0.10991441458463669, 'timestamp': '2025-09-10 02:40:23.141697', 'step': 12298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:23.172739', 'step': 12298, 'epoch': 2} {'type': 'loss', 'content': 0.17346049845218658, 'timestamp': '2025-09-10 02:40:23.176416', 'step': 12299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:23.207969', 'step': 12299, 'epoch': 2} {'type': 'loss', 'content': 0.08054978400468826, 'timestamp': '2025-09-10 02:40:23.232276', 'step': 12300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:23.263223', 'step': 12300, 'epoch': 2} {'type': 'loss', 'content': 0.09800183027982712, 'timestamp': '2025-09-10 02:40:23.265753', 'step': 12301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:23.297942', 'step': 12301, 'epoch': 2} {'type': 'loss', 'content': 0.10579575598239899, 'timestamp': '2025-09-10 02:40:23.301064', 'step': 12302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:23.331774', 'step': 12302, 'epoch': 2} {'type': 'loss', 'content': 0.14389050006866455, 'timestamp': '2025-09-10 02:40:23.334230', 'step': 12303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:23.364133', 'step': 12303, 'epoch': 2} {'type': 'loss', 'content': 0.07511566579341888, 'timestamp': '2025-09-10 02:40:23.387986', 'step': 12304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:23.418327', 'step': 12304, 'epoch': 2} {'type': 'loss', 'content': 0.13992977142333984, 'timestamp': '2025-09-10 02:40:23.420733', 'step': 12305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:23.451289', 'step': 12305, 'epoch': 2} {'type': 'loss', 'content': 0.057301074266433716, 'timestamp': '2025-09-10 02:40:23.453937', 'step': 12306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:23.484616', 'step': 12306, 'epoch': 2} {'type': 'loss', 'content': 0.11630384624004364, 'timestamp': '2025-09-10 02:40:23.487274', 'step': 12307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:23.518190', 'step': 12307, 'epoch': 2} {'type': 'loss', 'content': 0.0859222337603569, 'timestamp': '2025-09-10 02:40:23.541841', 'step': 12308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:23.572211', 'step': 12308, 'epoch': 2} {'type': 'loss', 'content': 0.19267775118350983, 'timestamp': '2025-09-10 02:40:23.574777', 'step': 12309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:23.608279', 'step': 12309, 'epoch': 2} {'type': 'loss', 'content': 0.13309913873672485, 'timestamp': '2025-09-10 02:40:23.610493', 'step': 12310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:23.641675', 'step': 12310, 'epoch': 2} {'type': 'loss', 'content': 0.08631793409585953, 'timestamp': '2025-09-10 02:40:23.644158', 'step': 12311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:23.675770', 'step': 12311, 'epoch': 2} {'type': 'loss', 'content': 0.12089160829782486, 'timestamp': '2025-09-10 02:40:23.701367', 'step': 12312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:23.732977', 'step': 12312, 'epoch': 2} {'type': 'loss', 'content': 0.04903160780668259, 'timestamp': '2025-09-10 02:40:23.735387', 'step': 12313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:23.766552', 'step': 12313, 'epoch': 2} {'type': 'loss', 'content': 0.07911507785320282, 'timestamp': '2025-09-10 02:40:23.769096', 'step': 12314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:23.799233', 'step': 12314, 'epoch': 2} {'type': 'loss', 'content': 0.08843182772397995, 'timestamp': '2025-09-10 02:40:23.801914', 'step': 12315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:23.833386', 'step': 12315, 'epoch': 2} {'type': 'loss', 'content': 0.12146762758493423, 'timestamp': '2025-09-10 02:40:23.857254', 'step': 12316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:23.888506', 'step': 12316, 'epoch': 2} {'type': 'loss', 'content': 0.03706593066453934, 'timestamp': '2025-09-10 02:40:23.890664', 'step': 12317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:23.921411', 'step': 12317, 'epoch': 2} {'type': 'loss', 'content': 0.22477005422115326, 'timestamp': '2025-09-10 02:40:23.930273', 'step': 12318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:23.975404', 'step': 12318, 'epoch': 2} {'type': 'loss', 'content': 0.13653090596199036, 'timestamp': '2025-09-10 02:40:23.983565', 'step': 12319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:24.015530', 'step': 12319, 'epoch': 2} {'type': 'loss', 'content': 0.07748660445213318, 'timestamp': '2025-09-10 02:40:24.039388', 'step': 12320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.070337', 'step': 12320, 'epoch': 2} {'type': 'loss', 'content': 0.12687133252620697, 'timestamp': '2025-09-10 02:40:24.072962', 'step': 12321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:24.103649', 'step': 12321, 'epoch': 2} {'type': 'loss', 'content': 0.10918105393648148, 'timestamp': '2025-09-10 02:40:24.106241', 'step': 12322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:24.136964', 'step': 12322, 'epoch': 2} {'type': 'loss', 'content': 0.1537914276123047, 'timestamp': '2025-09-10 02:40:24.139840', 'step': 12323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:24.170944', 'step': 12323, 'epoch': 2} {'type': 'loss', 'content': 0.06044115498661995, 'timestamp': '2025-09-10 02:40:24.196084', 'step': 12324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:24.227326', 'step': 12324, 'epoch': 2} {'type': 'loss', 'content': 0.11308998614549637, 'timestamp': '2025-09-10 02:40:24.229949', 'step': 12325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:40:24.261033', 'step': 12325, 'epoch': 2} {'type': 'loss', 'content': 0.15447629988193512, 'timestamp': '2025-09-10 02:40:24.265314', 'step': 12326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.296654', 'step': 12326, 'epoch': 2} {'type': 'loss', 'content': 0.10496856272220612, 'timestamp': '2025-09-10 02:40:24.299260', 'step': 12327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:24.330382', 'step': 12327, 'epoch': 2} {'type': 'loss', 'content': 0.13595424592494965, 'timestamp': '2025-09-10 02:40:24.354856', 'step': 12328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:24.385397', 'step': 12328, 'epoch': 2} {'type': 'loss', 'content': 0.2524796426296234, 'timestamp': '2025-09-10 02:40:24.391790', 'step': 12329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:24.430306', 'step': 12329, 'epoch': 2} {'type': 'loss', 'content': 0.1506846398115158, 'timestamp': '2025-09-10 02:40:24.434599', 'step': 12330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:24.467596', 'step': 12330, 'epoch': 2} {'type': 'loss', 'content': 0.06755811721086502, 'timestamp': '2025-09-10 02:40:24.471235', 'step': 12331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.504049', 'step': 12331, 'epoch': 2} {'type': 'loss', 'content': 0.06887296587228775, 'timestamp': '2025-09-10 02:40:24.528463', 'step': 12332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.560950', 'step': 12332, 'epoch': 2} {'type': 'loss', 'content': 0.0696585401892662, 'timestamp': '2025-09-10 02:40:24.565445', 'step': 12333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.597564', 'step': 12333, 'epoch': 2} {'type': 'loss', 'content': 0.0933544933795929, 'timestamp': '2025-09-10 02:40:24.600104', 'step': 12334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.629969', 'step': 12334, 'epoch': 2} {'type': 'loss', 'content': 0.12867897748947144, 'timestamp': '2025-09-10 02:40:24.634343', 'step': 12335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:24.664632', 'step': 12335, 'epoch': 2} {'type': 'loss', 'content': 0.11231506615877151, 'timestamp': '2025-09-10 02:40:24.688753', 'step': 12336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:24.721695', 'step': 12336, 'epoch': 2} {'type': 'loss', 'content': 0.1243591457605362, 'timestamp': '2025-09-10 02:40:24.724886', 'step': 12337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:24.756024', 'step': 12337, 'epoch': 2} {'type': 'loss', 'content': 0.06749006360769272, 'timestamp': '2025-09-10 02:40:24.758415', 'step': 12338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.788396', 'step': 12338, 'epoch': 2} {'type': 'loss', 'content': 0.1980879157781601, 'timestamp': '2025-09-10 02:40:24.791019', 'step': 12339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.823397', 'step': 12339, 'epoch': 2} {'type': 'loss', 'content': 0.12890589237213135, 'timestamp': '2025-09-10 02:40:24.847253', 'step': 12340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.877610', 'step': 12340, 'epoch': 2} {'type': 'loss', 'content': 0.1468992382287979, 'timestamp': '2025-09-10 02:40:24.880363', 'step': 12341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:24.910203', 'step': 12341, 'epoch': 2} {'type': 'loss', 'content': 0.10577139258384705, 'timestamp': '2025-09-10 02:40:24.912516', 'step': 12342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:24.942721', 'step': 12342, 'epoch': 2} {'type': 'loss', 'content': 0.0655190721154213, 'timestamp': '2025-09-10 02:40:24.945213', 'step': 12343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:24.976030', 'step': 12343, 'epoch': 2} {'type': 'loss', 'content': 0.10979469120502472, 'timestamp': '2025-09-10 02:40:24.999801', 'step': 12344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:25.030286', 'step': 12344, 'epoch': 2} {'type': 'loss', 'content': 0.08808046579360962, 'timestamp': '2025-09-10 02:40:25.033552', 'step': 12345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.064271', 'step': 12345, 'epoch': 2} {'type': 'loss', 'content': 0.1286223828792572, 'timestamp': '2025-09-10 02:40:25.066656', 'step': 12346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.097323', 'step': 12346, 'epoch': 2} {'type': 'loss', 'content': 0.10824029892683029, 'timestamp': '2025-09-10 02:40:25.099732', 'step': 12347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.130375', 'step': 12347, 'epoch': 2} {'type': 'loss', 'content': 0.0795932412147522, 'timestamp': '2025-09-10 02:40:25.154306', 'step': 12348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.185204', 'step': 12348, 'epoch': 2} {'type': 'loss', 'content': 0.13888800144195557, 'timestamp': '2025-09-10 02:40:25.187635', 'step': 12349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:25.217997', 'step': 12349, 'epoch': 2} {'type': 'loss', 'content': 0.12464702129364014, 'timestamp': '2025-09-10 02:40:25.220574', 'step': 12350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:25.251472', 'step': 12350, 'epoch': 2} {'type': 'loss', 'content': 0.14431732892990112, 'timestamp': '2025-09-10 02:40:25.253983', 'step': 12351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.284138', 'step': 12351, 'epoch': 2} {'type': 'loss', 'content': 0.08037064969539642, 'timestamp': '2025-09-10 02:40:25.307982', 'step': 12352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:25.338218', 'step': 12352, 'epoch': 2} {'type': 'loss', 'content': 0.2044253647327423, 'timestamp': '2025-09-10 02:40:25.341252', 'step': 12353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:25.373431', 'step': 12353, 'epoch': 2} {'type': 'loss', 'content': 0.15564607083797455, 'timestamp': '2025-09-10 02:40:25.376441', 'step': 12354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:25.408520', 'step': 12354, 'epoch': 2} {'type': 'loss', 'content': 0.18386900424957275, 'timestamp': '2025-09-10 02:40:25.411483', 'step': 12355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.443364', 'step': 12355, 'epoch': 2} {'type': 'loss', 'content': 0.060073159635066986, 'timestamp': '2025-09-10 02:40:25.467545', 'step': 12356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:25.499249', 'step': 12356, 'epoch': 2} {'type': 'loss', 'content': 0.11176390200853348, 'timestamp': '2025-09-10 02:40:25.504247', 'step': 12357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.535763', 'step': 12357, 'epoch': 2} {'type': 'loss', 'content': 0.06263194233179092, 'timestamp': '2025-09-10 02:40:25.538870', 'step': 12358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:25.570585', 'step': 12358, 'epoch': 2} {'type': 'loss', 'content': 0.16120274364948273, 'timestamp': '2025-09-10 02:40:25.573762', 'step': 12359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:25.607661', 'step': 12359, 'epoch': 2} {'type': 'loss', 'content': 0.14585505425930023, 'timestamp': '2025-09-10 02:40:25.631868', 'step': 12360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:25.663303', 'step': 12360, 'epoch': 2} {'type': 'loss', 'content': 0.0875946581363678, 'timestamp': '2025-09-10 02:40:25.666233', 'step': 12361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:25.697132', 'step': 12361, 'epoch': 2} {'type': 'loss', 'content': 0.1261083334684372, 'timestamp': '2025-09-10 02:40:25.699669', 'step': 12362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:25.732142', 'step': 12362, 'epoch': 2} {'type': 'loss', 'content': 0.14531435072422028, 'timestamp': '2025-09-10 02:40:25.734784', 'step': 12363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.765882', 'step': 12363, 'epoch': 2} {'type': 'loss', 'content': 0.05259133130311966, 'timestamp': '2025-09-10 02:40:25.790177', 'step': 12364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.820858', 'step': 12364, 'epoch': 2} {'type': 'loss', 'content': 0.09335781633853912, 'timestamp': '2025-09-10 02:40:25.823481', 'step': 12365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:25.854558', 'step': 12365, 'epoch': 2} {'type': 'loss', 'content': 0.10015901923179626, 'timestamp': '2025-09-10 02:40:25.857469', 'step': 12366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:25.887933', 'step': 12366, 'epoch': 2} {'type': 'loss', 'content': 0.11551670730113983, 'timestamp': '2025-09-10 02:40:25.890105', 'step': 12367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:25.924071', 'step': 12367, 'epoch': 2} {'type': 'loss', 'content': 0.12046218663454056, 'timestamp': '2025-09-10 02:40:25.947742', 'step': 12368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:25.978441', 'step': 12368, 'epoch': 2} {'type': 'loss', 'content': 0.08417107909917831, 'timestamp': '2025-09-10 02:40:25.981756', 'step': 12369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:26.012448', 'step': 12369, 'epoch': 2} {'type': 'loss', 'content': 0.09854598343372345, 'timestamp': '2025-09-10 02:40:26.015086', 'step': 12370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:26.045999', 'step': 12370, 'epoch': 2} {'type': 'loss', 'content': 0.14306345582008362, 'timestamp': '2025-09-10 02:40:26.048609', 'step': 12371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:26.079043', 'step': 12371, 'epoch': 2} {'type': 'loss', 'content': 0.12329071015119553, 'timestamp': '2025-09-10 02:40:26.102644', 'step': 12372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:26.134648', 'step': 12372, 'epoch': 2} {'type': 'loss', 'content': 0.06441400200128555, 'timestamp': '2025-09-10 02:40:26.138677', 'step': 12373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:26.169536', 'step': 12373, 'epoch': 2} {'type': 'loss', 'content': 0.1914554089307785, 'timestamp': '2025-09-10 02:40:26.174463', 'step': 12374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:26.204387', 'step': 12374, 'epoch': 2} {'type': 'loss', 'content': 0.14890842139720917, 'timestamp': '2025-09-10 02:40:26.207258', 'step': 12375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:26.239522', 'step': 12375, 'epoch': 2} {'type': 'loss', 'content': 0.10315420478582382, 'timestamp': '2025-09-10 02:40:26.262847', 'step': 12376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:26.294301', 'step': 12376, 'epoch': 2} {'type': 'loss', 'content': 0.1386609971523285, 'timestamp': '2025-09-10 02:40:26.296858', 'step': 12377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:26.327107', 'step': 12377, 'epoch': 2} {'type': 'loss', 'content': 0.09671641886234283, 'timestamp': '2025-09-10 02:40:26.329876', 'step': 12378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:26.360962', 'step': 12378, 'epoch': 2} {'type': 'loss', 'content': 0.0920877680182457, 'timestamp': '2025-09-10 02:40:26.364315', 'step': 12379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:26.394755', 'step': 12379, 'epoch': 2} {'type': 'loss', 'content': 0.09465187788009644, 'timestamp': '2025-09-10 02:40:26.418429', 'step': 12380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:26.448913', 'step': 12380, 'epoch': 2} {'type': 'loss', 'content': 0.08380241692066193, 'timestamp': '2025-09-10 02:40:26.451338', 'step': 12381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:26.481319', 'step': 12381, 'epoch': 2} {'type': 'loss', 'content': 0.08984261006116867, 'timestamp': '2025-09-10 02:40:26.483796', 'step': 12382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:26.513785', 'step': 12382, 'epoch': 2} {'type': 'loss', 'content': 0.13063016533851624, 'timestamp': '2025-09-10 02:40:26.517123', 'step': 12383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:26.547356', 'step': 12383, 'epoch': 2} {'type': 'loss', 'content': 0.07594482600688934, 'timestamp': '2025-09-10 02:40:26.571034', 'step': 12384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:26.607051', 'step': 12384, 'epoch': 2} {'type': 'loss', 'content': 0.12218639254570007, 'timestamp': '2025-09-10 02:40:26.609499', 'step': 12385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:26.641415', 'step': 12385, 'epoch': 2} {'type': 'loss', 'content': 0.117473304271698, 'timestamp': '2025-09-10 02:40:26.644102', 'step': 12386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:26.675479', 'step': 12386, 'epoch': 2} {'type': 'loss', 'content': 0.2232685685157776, 'timestamp': '2025-09-10 02:40:26.677992', 'step': 12387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:26.709720', 'step': 12387, 'epoch': 2} {'type': 'loss', 'content': 0.11532887816429138, 'timestamp': '2025-09-10 02:40:26.733700', 'step': 12388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:26.764367', 'step': 12388, 'epoch': 2} {'type': 'loss', 'content': 0.11926543712615967, 'timestamp': '2025-09-10 02:40:26.767410', 'step': 12389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:26.799040', 'step': 12389, 'epoch': 2} {'type': 'loss', 'content': 0.10966063290834427, 'timestamp': '2025-09-10 02:40:26.801489', 'step': 12390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:26.832581', 'step': 12390, 'epoch': 2} {'type': 'loss', 'content': 0.0767180398106575, 'timestamp': '2025-09-10 02:40:26.835091', 'step': 12391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:26.866984', 'step': 12391, 'epoch': 2} {'type': 'loss', 'content': 0.1177329421043396, 'timestamp': '2025-09-10 02:40:26.890852', 'step': 12392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:26.920729', 'step': 12392, 'epoch': 2} {'type': 'loss', 'content': 0.03910282254219055, 'timestamp': '2025-09-10 02:40:26.923062', 'step': 12393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:26.953218', 'step': 12393, 'epoch': 2} {'type': 'loss', 'content': 0.15049828588962555, 'timestamp': '2025-09-10 02:40:26.955972', 'step': 12394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:26.986227', 'step': 12394, 'epoch': 2} {'type': 'loss', 'content': 0.09455760568380356, 'timestamp': '2025-09-10 02:40:26.988688', 'step': 12395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:27.019496', 'step': 12395, 'epoch': 2} {'type': 'loss', 'content': 0.10758285224437714, 'timestamp': '2025-09-10 02:40:27.044624', 'step': 12396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:27.075890', 'step': 12396, 'epoch': 2} {'type': 'loss', 'content': 0.12786416709423065, 'timestamp': '2025-09-10 02:40:27.078532', 'step': 12397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:27.113481', 'step': 12397, 'epoch': 2} {'type': 'loss', 'content': 0.12410059571266174, 'timestamp': '2025-09-10 02:40:27.115843', 'step': 12398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:27.147607', 'step': 12398, 'epoch': 2} {'type': 'loss', 'content': 0.10004767030477524, 'timestamp': '2025-09-10 02:40:27.150000', 'step': 12399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:27.180434', 'step': 12399, 'epoch': 2} {'type': 'loss', 'content': 0.06301325559616089, 'timestamp': '2025-09-10 02:40:27.204140', 'step': 12400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:27.235222', 'step': 12400, 'epoch': 2} {'type': 'loss', 'content': 0.1028204932808876, 'timestamp': '2025-09-10 02:40:27.238133', 'step': 12401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:27.269336', 'step': 12401, 'epoch': 2} {'type': 'loss', 'content': 0.17445440590381622, 'timestamp': '2025-09-10 02:40:27.271678', 'step': 12402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:27.302316', 'step': 12402, 'epoch': 2} {'type': 'loss', 'content': 0.042181890457868576, 'timestamp': '2025-09-10 02:40:27.304661', 'step': 12403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:27.335618', 'step': 12403, 'epoch': 2} {'type': 'loss', 'content': 0.21689583361148834, 'timestamp': '2025-09-10 02:40:27.359889', 'step': 12404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:27.391195', 'step': 12404, 'epoch': 2} {'type': 'loss', 'content': 0.13166148960590363, 'timestamp': '2025-09-10 02:40:27.393764', 'step': 12405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:27.424766', 'step': 12405, 'epoch': 2} {'type': 'loss', 'content': 0.2027619630098343, 'timestamp': '2025-09-10 02:40:27.428701', 'step': 12406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:27.459262', 'step': 12406, 'epoch': 2} {'type': 'loss', 'content': 0.15638647973537445, 'timestamp': '2025-09-10 02:40:27.461788', 'step': 12407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:27.492354', 'step': 12407, 'epoch': 2} {'type': 'loss', 'content': 0.08278059214353561, 'timestamp': '2025-09-10 02:40:27.516094', 'step': 12408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:27.547288', 'step': 12408, 'epoch': 2} {'type': 'loss', 'content': 0.13177832961082458, 'timestamp': '2025-09-10 02:40:27.549616', 'step': 12409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:27.579674', 'step': 12409, 'epoch': 2} {'type': 'loss', 'content': 0.047673482447862625, 'timestamp': '2025-09-10 02:40:27.582553', 'step': 12410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:27.614281', 'step': 12410, 'epoch': 2} {'type': 'loss', 'content': 0.07499490678310394, 'timestamp': '2025-09-10 02:40:27.616841', 'step': 12411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:27.647509', 'step': 12411, 'epoch': 2} {'type': 'loss', 'content': 0.1072268858551979, 'timestamp': '2025-09-10 02:40:27.671120', 'step': 12412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:27.702050', 'step': 12412, 'epoch': 2} {'type': 'loss', 'content': 0.12084618955850601, 'timestamp': '2025-09-10 02:40:27.706253', 'step': 12413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:27.741910', 'step': 12413, 'epoch': 2} {'type': 'loss', 'content': 0.08994059264659882, 'timestamp': '2025-09-10 02:40:27.746246', 'step': 12414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:27.782876', 'step': 12414, 'epoch': 2} {'type': 'loss', 'content': 0.09738203138113022, 'timestamp': '2025-09-10 02:40:27.787349', 'step': 12415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:27.824370', 'step': 12415, 'epoch': 2} {'type': 'loss', 'content': 0.12674471735954285, 'timestamp': '2025-09-10 02:40:27.848674', 'step': 12416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:27.881325', 'step': 12416, 'epoch': 2} {'type': 'loss', 'content': 0.10482996702194214, 'timestamp': '2025-09-10 02:40:27.884407', 'step': 12417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:27.916341', 'step': 12417, 'epoch': 2} {'type': 'loss', 'content': 0.09342678636312485, 'timestamp': '2025-09-10 02:40:27.918920', 'step': 12418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:27.949633', 'step': 12418, 'epoch': 2} {'type': 'loss', 'content': 0.11485860496759415, 'timestamp': '2025-09-10 02:40:27.952576', 'step': 12419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:27.983913', 'step': 12419, 'epoch': 2} {'type': 'loss', 'content': 0.118169404566288, 'timestamp': '2025-09-10 02:40:28.007723', 'step': 12420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:28.038477', 'step': 12420, 'epoch': 2} {'type': 'loss', 'content': 0.08298753947019577, 'timestamp': '2025-09-10 02:40:28.044226', 'step': 12421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:28.074202', 'step': 12421, 'epoch': 2} {'type': 'loss', 'content': 0.14290139079093933, 'timestamp': '2025-09-10 02:40:28.076908', 'step': 12422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:28.107061', 'step': 12422, 'epoch': 2} {'type': 'loss', 'content': 0.06706228852272034, 'timestamp': '2025-09-10 02:40:28.109889', 'step': 12423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:28.139934', 'step': 12423, 'epoch': 2} {'type': 'loss', 'content': 0.10189518332481384, 'timestamp': '2025-09-10 02:40:28.163814', 'step': 12424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:28.195383', 'step': 12424, 'epoch': 2} {'type': 'loss', 'content': 0.08701738715171814, 'timestamp': '2025-09-10 02:40:28.199094', 'step': 12425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:28.230525', 'step': 12425, 'epoch': 2} {'type': 'loss', 'content': 0.1341208517551422, 'timestamp': '2025-09-10 02:40:28.233107', 'step': 12426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:28.265552', 'step': 12426, 'epoch': 2} {'type': 'loss', 'content': 0.12278939038515091, 'timestamp': '2025-09-10 02:40:28.267930', 'step': 12427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:28.298748', 'step': 12427, 'epoch': 2} {'type': 'loss', 'content': 0.03562983125448227, 'timestamp': '2025-09-10 02:40:28.322588', 'step': 12428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:28.360263', 'step': 12428, 'epoch': 2} {'type': 'loss', 'content': 0.20529930293560028, 'timestamp': '2025-09-10 02:40:28.367187', 'step': 12429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:28.411600', 'step': 12429, 'epoch': 2} {'type': 'loss', 'content': 0.10281404852867126, 'timestamp': '2025-09-10 02:40:28.414488', 'step': 12430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:28.445920', 'step': 12430, 'epoch': 2} {'type': 'loss', 'content': 0.09365448355674744, 'timestamp': '2025-09-10 02:40:28.448439', 'step': 12431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:28.479029', 'step': 12431, 'epoch': 2} {'type': 'loss', 'content': 0.02426956221461296, 'timestamp': '2025-09-10 02:40:28.503098', 'step': 12432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:28.534789', 'step': 12432, 'epoch': 2} {'type': 'loss', 'content': 0.09667467325925827, 'timestamp': '2025-09-10 02:40:28.537334', 'step': 12433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:28.571395', 'step': 12433, 'epoch': 2} {'type': 'loss', 'content': 0.1285616010427475, 'timestamp': '2025-09-10 02:40:28.573851', 'step': 12434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:28.606891', 'step': 12434, 'epoch': 2} {'type': 'loss', 'content': 0.05502459034323692, 'timestamp': '2025-09-10 02:40:28.609397', 'step': 12435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:28.639680', 'step': 12435, 'epoch': 2} {'type': 'loss', 'content': 0.0870334804058075, 'timestamp': '2025-09-10 02:40:28.666133', 'step': 12436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:28.696813', 'step': 12436, 'epoch': 2} {'type': 'loss', 'content': 0.1019689217209816, 'timestamp': '2025-09-10 02:40:28.699509', 'step': 12437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:28.729744', 'step': 12437, 'epoch': 2} {'type': 'loss', 'content': 0.06127293035387993, 'timestamp': '2025-09-10 02:40:28.732607', 'step': 12438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:28.763447', 'step': 12438, 'epoch': 2} {'type': 'loss', 'content': 0.24064671993255615, 'timestamp': '2025-09-10 02:40:28.766711', 'step': 12439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:28.797722', 'step': 12439, 'epoch': 2} {'type': 'loss', 'content': 0.0930677205324173, 'timestamp': '2025-09-10 02:40:28.821393', 'step': 12440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:28.852906', 'step': 12440, 'epoch': 2} {'type': 'loss', 'content': 0.10315213352441788, 'timestamp': '2025-09-10 02:40:28.855660', 'step': 12441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:28.887416', 'step': 12441, 'epoch': 2} {'type': 'loss', 'content': 0.08362987637519836, 'timestamp': '2025-09-10 02:40:28.890153', 'step': 12442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:28.920215', 'step': 12442, 'epoch': 2} {'type': 'loss', 'content': 0.07620351016521454, 'timestamp': '2025-09-10 02:40:28.922983', 'step': 12443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:28.953788', 'step': 12443, 'epoch': 2} {'type': 'loss', 'content': 0.14400236308574677, 'timestamp': '2025-09-10 02:40:28.977748', 'step': 12444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:29.008462', 'step': 12444, 'epoch': 2} {'type': 'loss', 'content': 0.11868736892938614, 'timestamp': '2025-09-10 02:40:29.011109', 'step': 12445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:29.042128', 'step': 12445, 'epoch': 2} {'type': 'loss', 'content': 0.11569135636091232, 'timestamp': '2025-09-10 02:40:29.048746', 'step': 12446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:29.079898', 'step': 12446, 'epoch': 2} {'type': 'loss', 'content': 0.10924521088600159, 'timestamp': '2025-09-10 02:40:29.082866', 'step': 12447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:29.113825', 'step': 12447, 'epoch': 2} {'type': 'loss', 'content': 0.09176519513130188, 'timestamp': '2025-09-10 02:40:29.137909', 'step': 12448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:29.169707', 'step': 12448, 'epoch': 2} {'type': 'loss', 'content': 0.0546286404132843, 'timestamp': '2025-09-10 02:40:29.173063', 'step': 12449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:29.211556', 'step': 12449, 'epoch': 2} {'type': 'loss', 'content': 0.051555294543504715, 'timestamp': '2025-09-10 02:40:29.213984', 'step': 12450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:29.244157', 'step': 12450, 'epoch': 2} {'type': 'loss', 'content': 0.16709451377391815, 'timestamp': '2025-09-10 02:40:29.247091', 'step': 12451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:29.279155', 'step': 12451, 'epoch': 2} {'type': 'loss', 'content': 0.14857809245586395, 'timestamp': '2025-09-10 02:40:29.303678', 'step': 12452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:29.333968', 'step': 12452, 'epoch': 2} {'type': 'loss', 'content': 0.09592888504266739, 'timestamp': '2025-09-10 02:40:29.336561', 'step': 12453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:29.367460', 'step': 12453, 'epoch': 2} {'type': 'loss', 'content': 0.22113467752933502, 'timestamp': '2025-09-10 02:40:29.369821', 'step': 12454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:29.400067', 'step': 12454, 'epoch': 2} {'type': 'loss', 'content': 0.1135823130607605, 'timestamp': '2025-09-10 02:40:29.402338', 'step': 12455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:29.432272', 'step': 12455, 'epoch': 2} {'type': 'loss', 'content': 0.1092478409409523, 'timestamp': '2025-09-10 02:40:29.456086', 'step': 12456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:29.498981', 'step': 12456, 'epoch': 2} {'type': 'loss', 'content': 0.07574620842933655, 'timestamp': '2025-09-10 02:40:29.503958', 'step': 12457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:29.541879', 'step': 12457, 'epoch': 2} {'type': 'loss', 'content': 0.09007686376571655, 'timestamp': '2025-09-10 02:40:29.544553', 'step': 12458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:29.575473', 'step': 12458, 'epoch': 2} {'type': 'loss', 'content': 0.1799037903547287, 'timestamp': '2025-09-10 02:40:29.579214', 'step': 12459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:40:29.613751', 'step': 12459, 'epoch': 2} {'type': 'loss', 'content': 0.07656000554561615, 'timestamp': '2025-09-10 02:40:29.639103', 'step': 12460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:29.671199', 'step': 12460, 'epoch': 2} {'type': 'loss', 'content': 0.10577287524938583, 'timestamp': '2025-09-10 02:40:29.674125', 'step': 12461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:29.704192', 'step': 12461, 'epoch': 2} {'type': 'loss', 'content': 0.0917007103562355, 'timestamp': '2025-09-10 02:40:29.707125', 'step': 12462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:29.737190', 'step': 12462, 'epoch': 2} {'type': 'loss', 'content': 0.1317416876554489, 'timestamp': '2025-09-10 02:40:29.739901', 'step': 12463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:29.770131', 'step': 12463, 'epoch': 2} {'type': 'loss', 'content': 0.08223935961723328, 'timestamp': '2025-09-10 02:40:29.794332', 'step': 12464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:29.825728', 'step': 12464, 'epoch': 2} {'type': 'loss', 'content': 0.05977452173829079, 'timestamp': '2025-09-10 02:40:29.831406', 'step': 12465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:29.869637', 'step': 12465, 'epoch': 2} {'type': 'loss', 'content': 0.09217358380556107, 'timestamp': '2025-09-10 02:40:29.875996', 'step': 12466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:29.911153', 'step': 12466, 'epoch': 2} {'type': 'loss', 'content': 0.07427242398262024, 'timestamp': '2025-09-10 02:40:29.914295', 'step': 12467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:29.944474', 'step': 12467, 'epoch': 2} {'type': 'loss', 'content': 0.08428926765918732, 'timestamp': '2025-09-10 02:40:29.968298', 'step': 12468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:30.000275', 'step': 12468, 'epoch': 2} {'type': 'loss', 'content': 0.10318191349506378, 'timestamp': '2025-09-10 02:40:30.003744', 'step': 12469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:30.035306', 'step': 12469, 'epoch': 2} {'type': 'loss', 'content': 0.08401857316493988, 'timestamp': '2025-09-10 02:40:30.037974', 'step': 12470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:30.069155', 'step': 12470, 'epoch': 2} {'type': 'loss', 'content': 0.1553758829832077, 'timestamp': '2025-09-10 02:40:30.071562', 'step': 12471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:30.102720', 'step': 12471, 'epoch': 2} {'type': 'loss', 'content': 0.06412944197654724, 'timestamp': '2025-09-10 02:40:30.126300', 'step': 12472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:30.156844', 'step': 12472, 'epoch': 2} {'type': 'loss', 'content': 0.14886465668678284, 'timestamp': '2025-09-10 02:40:30.159375', 'step': 12473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:30.191491', 'step': 12473, 'epoch': 2} {'type': 'loss', 'content': 0.09823346138000488, 'timestamp': '2025-09-10 02:40:30.198981', 'step': 12474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:30.230204', 'step': 12474, 'epoch': 2} {'type': 'loss', 'content': 0.07810422778129578, 'timestamp': '2025-09-10 02:40:30.232700', 'step': 12475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:30.263792', 'step': 12475, 'epoch': 2} {'type': 'loss', 'content': 0.08975891768932343, 'timestamp': '2025-09-10 02:40:30.287674', 'step': 12476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:30.318128', 'step': 12476, 'epoch': 2} {'type': 'loss', 'content': 0.14934633672237396, 'timestamp': '2025-09-10 02:40:30.320651', 'step': 12477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:30.351845', 'step': 12477, 'epoch': 2} {'type': 'loss', 'content': 0.12234067916870117, 'timestamp': '2025-09-10 02:40:30.354449', 'step': 12478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:30.384884', 'step': 12478, 'epoch': 2} {'type': 'loss', 'content': 0.10730554908514023, 'timestamp': '2025-09-10 02:40:30.389867', 'step': 12479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:30.426397', 'step': 12479, 'epoch': 2} {'type': 'loss', 'content': 0.10219669342041016, 'timestamp': '2025-09-10 02:40:30.452110', 'step': 12480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:30.483366', 'step': 12480, 'epoch': 2} {'type': 'loss', 'content': 0.02848268859088421, 'timestamp': '2025-09-10 02:40:30.486880', 'step': 12481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:30.518048', 'step': 12481, 'epoch': 2} {'type': 'loss', 'content': 0.09806827455759048, 'timestamp': '2025-09-10 02:40:30.520362', 'step': 12482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:30.551151', 'step': 12482, 'epoch': 2} {'type': 'loss', 'content': 0.08340153098106384, 'timestamp': '2025-09-10 02:40:30.553438', 'step': 12483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:30.583591', 'step': 12483, 'epoch': 2} {'type': 'loss', 'content': 0.10925345122814178, 'timestamp': '2025-09-10 02:40:30.607422', 'step': 12484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:30.639422', 'step': 12484, 'epoch': 2} {'type': 'loss', 'content': 0.1273462027311325, 'timestamp': '2025-09-10 02:40:30.641937', 'step': 12485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:30.672477', 'step': 12485, 'epoch': 2} {'type': 'loss', 'content': 0.10851854830980301, 'timestamp': '2025-09-10 02:40:30.675276', 'step': 12486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:30.705464', 'step': 12486, 'epoch': 2} {'type': 'loss', 'content': 0.1417151540517807, 'timestamp': '2025-09-10 02:40:30.708236', 'step': 12487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:30.739470', 'step': 12487, 'epoch': 2} {'type': 'loss', 'content': 0.12153565883636475, 'timestamp': '2025-09-10 02:40:30.763755', 'step': 12488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:30.794709', 'step': 12488, 'epoch': 2} {'type': 'loss', 'content': 0.13907817006111145, 'timestamp': '2025-09-10 02:40:30.797582', 'step': 12489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:30.828277', 'step': 12489, 'epoch': 2} {'type': 'loss', 'content': 0.08298762142658234, 'timestamp': '2025-09-10 02:40:30.831270', 'step': 12490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:30.861672', 'step': 12490, 'epoch': 2} {'type': 'loss', 'content': 0.1014767736196518, 'timestamp': '2025-09-10 02:40:30.864239', 'step': 12491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:30.896376', 'step': 12491, 'epoch': 2} {'type': 'loss', 'content': 0.1002899557352066, 'timestamp': '2025-09-10 02:40:30.920257', 'step': 12492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:30.951287', 'step': 12492, 'epoch': 2} {'type': 'loss', 'content': 0.13598477840423584, 'timestamp': '2025-09-10 02:40:30.953821', 'step': 12493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:30.984195', 'step': 12493, 'epoch': 2} {'type': 'loss', 'content': 0.09922467917203903, 'timestamp': '2025-09-10 02:40:30.986667', 'step': 12494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:31.016979', 'step': 12494, 'epoch': 2} {'type': 'loss', 'content': 0.07198365777730942, 'timestamp': '2025-09-10 02:40:31.019363', 'step': 12495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:31.049587', 'step': 12495, 'epoch': 2} {'type': 'loss', 'content': 0.1708967387676239, 'timestamp': '2025-09-10 02:40:31.075057', 'step': 12496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:31.105806', 'step': 12496, 'epoch': 2} {'type': 'loss', 'content': 0.119052954018116, 'timestamp': '2025-09-10 02:40:31.108534', 'step': 12497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:31.139810', 'step': 12497, 'epoch': 2} {'type': 'loss', 'content': 0.14257289469242096, 'timestamp': '2025-09-10 02:40:31.142530', 'step': 12498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:31.175021', 'step': 12498, 'epoch': 2} {'type': 'loss', 'content': 0.13379625976085663, 'timestamp': '2025-09-10 02:40:31.177568', 'step': 12499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:31.208065', 'step': 12499, 'epoch': 2} {'type': 'loss', 'content': 0.15769684314727783, 'timestamp': '2025-09-10 02:40:31.231934', 'step': 12500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12500', 'timestamp': '2025-09-10 02:40:37.487731', 'step': 12500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:37.521330', 'step': 12500, 'epoch': 2} {'type': 'loss', 'content': 0.07865440845489502, 'timestamp': '2025-09-10 02:40:37.523683', 'step': 12501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:37.554144', 'step': 12501, 'epoch': 2} {'type': 'loss', 'content': 0.1147419661283493, 'timestamp': '2025-09-10 02:40:37.557632', 'step': 12502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:37.593267', 'step': 12502, 'epoch': 2} {'type': 'loss', 'content': 0.15630683302879333, 'timestamp': '2025-09-10 02:40:37.596049', 'step': 12503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:37.627274', 'step': 12503, 'epoch': 2} {'type': 'loss', 'content': 0.14081691205501556, 'timestamp': '2025-09-10 02:40:37.650935', 'step': 12504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:37.683472', 'step': 12504, 'epoch': 2} {'type': 'loss', 'content': 0.09219682216644287, 'timestamp': '2025-09-10 02:40:37.686153', 'step': 12505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:37.716598', 'step': 12505, 'epoch': 2} {'type': 'loss', 'content': 0.08211252093315125, 'timestamp': '2025-09-10 02:40:37.719226', 'step': 12506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:37.750631', 'step': 12506, 'epoch': 2} {'type': 'loss', 'content': 0.18885663151741028, 'timestamp': '2025-09-10 02:40:37.753128', 'step': 12507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:37.786799', 'step': 12507, 'epoch': 2} {'type': 'loss', 'content': 0.08142384886741638, 'timestamp': '2025-09-10 02:40:37.810661', 'step': 12508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:40:37.841167', 'step': 12508, 'epoch': 2} {'type': 'loss', 'content': 0.11452221125364304, 'timestamp': '2025-09-10 02:40:37.843610', 'step': 12509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:37.874211', 'step': 12509, 'epoch': 2} {'type': 'loss', 'content': 0.1078072041273117, 'timestamp': '2025-09-10 02:40:37.876712', 'step': 12510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:37.906613', 'step': 12510, 'epoch': 2} {'type': 'loss', 'content': 0.12207301706075668, 'timestamp': '2025-09-10 02:40:37.908922', 'step': 12511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:37.939016', 'step': 12511, 'epoch': 2} {'type': 'loss', 'content': 0.15164083242416382, 'timestamp': '2025-09-10 02:40:37.962950', 'step': 12512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:37.995124', 'step': 12512, 'epoch': 2} {'type': 'loss', 'content': 0.08295148611068726, 'timestamp': '2025-09-10 02:40:37.997846', 'step': 12513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:38.029893', 'step': 12513, 'epoch': 2} {'type': 'loss', 'content': 0.09421221166849136, 'timestamp': '2025-09-10 02:40:38.032993', 'step': 12514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:38.065331', 'step': 12514, 'epoch': 2} {'type': 'loss', 'content': 0.11631708592176437, 'timestamp': '2025-09-10 02:40:38.067733', 'step': 12515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.099185', 'step': 12515, 'epoch': 2} {'type': 'loss', 'content': 0.0642281100153923, 'timestamp': '2025-09-10 02:40:38.122832', 'step': 12516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:38.152903', 'step': 12516, 'epoch': 2} {'type': 'loss', 'content': 0.20499737560749054, 'timestamp': '2025-09-10 02:40:38.155608', 'step': 12517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.188386', 'step': 12517, 'epoch': 2} {'type': 'loss', 'content': 0.04236239939928055, 'timestamp': '2025-09-10 02:40:38.190913', 'step': 12518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:38.221784', 'step': 12518, 'epoch': 2} {'type': 'loss', 'content': 0.07812662422657013, 'timestamp': '2025-09-10 02:40:38.227916', 'step': 12519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:38.259924', 'step': 12519, 'epoch': 2} {'type': 'loss', 'content': 0.06646430492401123, 'timestamp': '2025-09-10 02:40:38.283721', 'step': 12520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:38.314357', 'step': 12520, 'epoch': 2} {'type': 'loss', 'content': 0.06627512723207474, 'timestamp': '2025-09-10 02:40:38.316710', 'step': 12521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.348188', 'step': 12521, 'epoch': 2} {'type': 'loss', 'content': 0.10526374727487564, 'timestamp': '2025-09-10 02:40:38.350806', 'step': 12522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.381587', 'step': 12522, 'epoch': 2} {'type': 'loss', 'content': 0.058537308126688004, 'timestamp': '2025-09-10 02:40:38.384214', 'step': 12523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.414128', 'step': 12523, 'epoch': 2} {'type': 'loss', 'content': 0.1511276662349701, 'timestamp': '2025-09-10 02:40:38.438079', 'step': 12524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.469319', 'step': 12524, 'epoch': 2} {'type': 'loss', 'content': 0.11870002746582031, 'timestamp': '2025-09-10 02:40:38.471771', 'step': 12525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:38.502627', 'step': 12525, 'epoch': 2} {'type': 'loss', 'content': 0.175919309258461, 'timestamp': '2025-09-10 02:40:38.505343', 'step': 12526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.536476', 'step': 12526, 'epoch': 2} {'type': 'loss', 'content': 0.12796711921691895, 'timestamp': '2025-09-10 02:40:38.538917', 'step': 12527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:38.569128', 'step': 12527, 'epoch': 2} {'type': 'loss', 'content': 0.06385666877031326, 'timestamp': '2025-09-10 02:40:38.593128', 'step': 12528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:38.624978', 'step': 12528, 'epoch': 2} {'type': 'loss', 'content': 0.1114869937300682, 'timestamp': '2025-09-10 02:40:38.628741', 'step': 12529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:38.666778', 'step': 12529, 'epoch': 2} {'type': 'loss', 'content': 0.15833915770053864, 'timestamp': '2025-09-10 02:40:38.675383', 'step': 12530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.715446', 'step': 12530, 'epoch': 2} {'type': 'loss', 'content': 0.12087657302618027, 'timestamp': '2025-09-10 02:40:38.718473', 'step': 12531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:38.749820', 'step': 12531, 'epoch': 2} {'type': 'loss', 'content': 0.020714815706014633, 'timestamp': '2025-09-10 02:40:38.773688', 'step': 12532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.805343', 'step': 12532, 'epoch': 2} {'type': 'loss', 'content': 0.06800993531942368, 'timestamp': '2025-09-10 02:40:38.808381', 'step': 12533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:38.838459', 'step': 12533, 'epoch': 2} {'type': 'loss', 'content': 0.11363005638122559, 'timestamp': '2025-09-10 02:40:38.840942', 'step': 12534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.871293', 'step': 12534, 'epoch': 2} {'type': 'loss', 'content': 0.06664489954710007, 'timestamp': '2025-09-10 02:40:38.874262', 'step': 12535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.904184', 'step': 12535, 'epoch': 2} {'type': 'loss', 'content': 0.15824353694915771, 'timestamp': '2025-09-10 02:40:38.927575', 'step': 12536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:38.959043', 'step': 12536, 'epoch': 2} {'type': 'loss', 'content': 0.06982459127902985, 'timestamp': '2025-09-10 02:40:38.961663', 'step': 12537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:38.992520', 'step': 12537, 'epoch': 2} {'type': 'loss', 'content': 0.11358223110437393, 'timestamp': '2025-09-10 02:40:38.995434', 'step': 12538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:39.026283', 'step': 12538, 'epoch': 2} {'type': 'loss', 'content': 0.12136657536029816, 'timestamp': '2025-09-10 02:40:39.028764', 'step': 12539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:39.059306', 'step': 12539, 'epoch': 2} {'type': 'loss', 'content': 0.057512201368808746, 'timestamp': '2025-09-10 02:40:39.082943', 'step': 12540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:39.113937', 'step': 12540, 'epoch': 2} {'type': 'loss', 'content': 0.12477762252092361, 'timestamp': '2025-09-10 02:40:39.116337', 'step': 12541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.146369', 'step': 12541, 'epoch': 2} {'type': 'loss', 'content': 0.2045302391052246, 'timestamp': '2025-09-10 02:40:39.149091', 'step': 12542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:39.180137', 'step': 12542, 'epoch': 2} {'type': 'loss', 'content': 0.0905177965760231, 'timestamp': '2025-09-10 02:40:39.182705', 'step': 12543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:39.213795', 'step': 12543, 'epoch': 2} {'type': 'loss', 'content': 0.04951922222971916, 'timestamp': '2025-09-10 02:40:39.244222', 'step': 12544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:39.275888', 'step': 12544, 'epoch': 2} {'type': 'loss', 'content': 0.08613946288824081, 'timestamp': '2025-09-10 02:40:39.279801', 'step': 12545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.310635', 'step': 12545, 'epoch': 2} {'type': 'loss', 'content': 0.07173245400190353, 'timestamp': '2025-09-10 02:40:39.313397', 'step': 12546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:39.344578', 'step': 12546, 'epoch': 2} {'type': 'loss', 'content': 0.18518346548080444, 'timestamp': '2025-09-10 02:40:39.346991', 'step': 12547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.377610', 'step': 12547, 'epoch': 2} {'type': 'loss', 'content': 0.07821699976921082, 'timestamp': '2025-09-10 02:40:39.402220', 'step': 12548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:39.433481', 'step': 12548, 'epoch': 2} {'type': 'loss', 'content': 0.0757313221693039, 'timestamp': '2025-09-10 02:40:39.435927', 'step': 12549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:39.466477', 'step': 12549, 'epoch': 2} {'type': 'loss', 'content': 0.14870478212833405, 'timestamp': '2025-09-10 02:40:39.469553', 'step': 12550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:39.500906', 'step': 12550, 'epoch': 2} {'type': 'loss', 'content': 0.1899845153093338, 'timestamp': '2025-09-10 02:40:39.503324', 'step': 12551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:39.534244', 'step': 12551, 'epoch': 2} {'type': 'loss', 'content': 0.1848689615726471, 'timestamp': '2025-09-10 02:40:39.557929', 'step': 12552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.588118', 'step': 12552, 'epoch': 2} {'type': 'loss', 'content': 0.0497906431555748, 'timestamp': '2025-09-10 02:40:39.590693', 'step': 12553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.621702', 'step': 12553, 'epoch': 2} {'type': 'loss', 'content': 0.11832865327596664, 'timestamp': '2025-09-10 02:40:39.624254', 'step': 12554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.654526', 'step': 12554, 'epoch': 2} {'type': 'loss', 'content': 0.17979289591312408, 'timestamp': '2025-09-10 02:40:39.656931', 'step': 12555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.687785', 'step': 12555, 'epoch': 2} {'type': 'loss', 'content': 0.19677454233169556, 'timestamp': '2025-09-10 02:40:39.711730', 'step': 12556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.742122', 'step': 12556, 'epoch': 2} {'type': 'loss', 'content': 0.10019509494304657, 'timestamp': '2025-09-10 02:40:39.744524', 'step': 12557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:39.775433', 'step': 12557, 'epoch': 2} {'type': 'loss', 'content': 0.09180271625518799, 'timestamp': '2025-09-10 02:40:39.777811', 'step': 12558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.809668', 'step': 12558, 'epoch': 2} {'type': 'loss', 'content': 0.15520577132701874, 'timestamp': '2025-09-10 02:40:39.812882', 'step': 12559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:39.843868', 'step': 12559, 'epoch': 2} {'type': 'loss', 'content': 0.12376982718706131, 'timestamp': '2025-09-10 02:40:39.867630', 'step': 12560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:39.898878', 'step': 12560, 'epoch': 2} {'type': 'loss', 'content': 0.06969330459833145, 'timestamp': '2025-09-10 02:40:39.901409', 'step': 12561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:39.932744', 'step': 12561, 'epoch': 2} {'type': 'loss', 'content': 0.18086716532707214, 'timestamp': '2025-09-10 02:40:39.936448', 'step': 12562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:39.970146', 'step': 12562, 'epoch': 2} {'type': 'loss', 'content': 0.1892523467540741, 'timestamp': '2025-09-10 02:40:39.972744', 'step': 12563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:40.003128', 'step': 12563, 'epoch': 2} {'type': 'loss', 'content': 0.1515565812587738, 'timestamp': '2025-09-10 02:40:40.027896', 'step': 12564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:40.058143', 'step': 12564, 'epoch': 2} {'type': 'loss', 'content': 0.1355152279138565, 'timestamp': '2025-09-10 02:40:40.060556', 'step': 12565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:40.091928', 'step': 12565, 'epoch': 2} {'type': 'loss', 'content': 0.06567028164863586, 'timestamp': '2025-09-10 02:40:40.095774', 'step': 12566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:40.127299', 'step': 12566, 'epoch': 2} {'type': 'loss', 'content': 0.09963354468345642, 'timestamp': '2025-09-10 02:40:40.129508', 'step': 12567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:40.160493', 'step': 12567, 'epoch': 2} {'type': 'loss', 'content': 0.08176452666521072, 'timestamp': '2025-09-10 02:40:40.184099', 'step': 12568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:40.216896', 'step': 12568, 'epoch': 2} {'type': 'loss', 'content': 0.19169144332408905, 'timestamp': '2025-09-10 02:40:40.224533', 'step': 12569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:40.263953', 'step': 12569, 'epoch': 2} {'type': 'loss', 'content': 0.20373158156871796, 'timestamp': '2025-09-10 02:40:40.266443', 'step': 12570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:40.297821', 'step': 12570, 'epoch': 2} {'type': 'loss', 'content': 0.12250573933124542, 'timestamp': '2025-09-10 02:40:40.300854', 'step': 12571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:40.332880', 'step': 12571, 'epoch': 2} {'type': 'loss', 'content': 0.08048124611377716, 'timestamp': '2025-09-10 02:40:40.356823', 'step': 12572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:40.388274', 'step': 12572, 'epoch': 2} {'type': 'loss', 'content': 0.12697777152061462, 'timestamp': '2025-09-10 02:40:40.392089', 'step': 12573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:40.422360', 'step': 12573, 'epoch': 2} {'type': 'loss', 'content': 0.1556171476840973, 'timestamp': '2025-09-10 02:40:40.424872', 'step': 12574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:40.459405', 'step': 12574, 'epoch': 2} {'type': 'loss', 'content': 0.0900372713804245, 'timestamp': '2025-09-10 02:40:40.462144', 'step': 12575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:40.493410', 'step': 12575, 'epoch': 2} {'type': 'loss', 'content': 0.11313054710626602, 'timestamp': '2025-09-10 02:40:40.516700', 'step': 12576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:40.549301', 'step': 12576, 'epoch': 2} {'type': 'loss', 'content': 0.10827289521694183, 'timestamp': '2025-09-10 02:40:40.552323', 'step': 12577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:40.584037', 'step': 12577, 'epoch': 2} {'type': 'loss', 'content': 0.1569083034992218, 'timestamp': '2025-09-10 02:40:40.586984', 'step': 12578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:40.621418', 'step': 12578, 'epoch': 2} {'type': 'loss', 'content': 0.10791261494159698, 'timestamp': '2025-09-10 02:40:40.624524', 'step': 12579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:40.657174', 'step': 12579, 'epoch': 2} {'type': 'loss', 'content': 0.1697973757982254, 'timestamp': '2025-09-10 02:40:40.681407', 'step': 12580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:40.713600', 'step': 12580, 'epoch': 2} {'type': 'loss', 'content': 0.10977495461702347, 'timestamp': '2025-09-10 02:40:40.715943', 'step': 12581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:40.745650', 'step': 12581, 'epoch': 2} {'type': 'loss', 'content': 0.12109392136335373, 'timestamp': '2025-09-10 02:40:40.747971', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:40:48.468594', 'step': 12582, 'epoch': 2} {'type': 'pplx', 'content': 13167.352674132135, 'timestamp': '2025-09-10 02:40:48.472062', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:48.501806', 'step': 12582, 'epoch': 2} {'type': 'loss', 'content': 0.049990542232990265, 'timestamp': '2025-09-10 02:40:48.505356', 'step': 12583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:48.535306', 'step': 12583, 'epoch': 2} {'type': 'loss', 'content': 0.09129219502210617, 'timestamp': '2025-09-10 02:40:48.559739', 'step': 12584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:48.590966', 'step': 12584, 'epoch': 2} {'type': 'loss', 'content': 0.09074384719133377, 'timestamp': '2025-09-10 02:40:48.593603', 'step': 12585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:48.625053', 'step': 12585, 'epoch': 2} {'type': 'loss', 'content': 0.10203023999929428, 'timestamp': '2025-09-10 02:40:48.627915', 'step': 12586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:48.657733', 'step': 12586, 'epoch': 2} {'type': 'loss', 'content': 0.06373812258243561, 'timestamp': '2025-09-10 02:40:48.659977', 'step': 12587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:48.689660', 'step': 12587, 'epoch': 2} {'type': 'loss', 'content': 0.13461849093437195, 'timestamp': '2025-09-10 02:40:48.713263', 'step': 12588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:48.743240', 'step': 12588, 'epoch': 2} {'type': 'loss', 'content': 0.08850506693124771, 'timestamp': '2025-09-10 02:40:48.745596', 'step': 12589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:48.775751', 'step': 12589, 'epoch': 2} {'type': 'loss', 'content': 0.11256320774555206, 'timestamp': '2025-09-10 02:40:48.778267', 'step': 12590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:48.809748', 'step': 12590, 'epoch': 2} {'type': 'loss', 'content': 0.04101870581507683, 'timestamp': '2025-09-10 02:40:48.812030', 'step': 12591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:48.842223', 'step': 12591, 'epoch': 2} {'type': 'loss', 'content': 0.0649104192852974, 'timestamp': '2025-09-10 02:40:48.865789', 'step': 12592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:48.896171', 'step': 12592, 'epoch': 2} {'type': 'loss', 'content': 0.09886406362056732, 'timestamp': '2025-09-10 02:40:48.898694', 'step': 12593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:48.930217', 'step': 12593, 'epoch': 2} {'type': 'loss', 'content': 0.08250964432954788, 'timestamp': '2025-09-10 02:40:48.932774', 'step': 12594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:48.963396', 'step': 12594, 'epoch': 2} {'type': 'loss', 'content': 0.08102740347385406, 'timestamp': '2025-09-10 02:40:48.965736', 'step': 12595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:48.995604', 'step': 12595, 'epoch': 2} {'type': 'loss', 'content': 0.13032329082489014, 'timestamp': '2025-09-10 02:40:49.019324', 'step': 12596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:49.049656', 'step': 12596, 'epoch': 2} {'type': 'loss', 'content': 0.08877679705619812, 'timestamp': '2025-09-10 02:40:49.052001', 'step': 12597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:49.081645', 'step': 12597, 'epoch': 2} {'type': 'loss', 'content': 0.12782487273216248, 'timestamp': '2025-09-10 02:40:49.084131', 'step': 12598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:49.114604', 'step': 12598, 'epoch': 2} {'type': 'loss', 'content': 0.20822636783123016, 'timestamp': '2025-09-10 02:40:49.116948', 'step': 12599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:49.147186', 'step': 12599, 'epoch': 2} {'type': 'loss', 'content': 0.12410514801740646, 'timestamp': '2025-09-10 02:40:49.171203', 'step': 12600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:49.201888', 'step': 12600, 'epoch': 2} {'type': 'loss', 'content': 0.10394447296857834, 'timestamp': '2025-09-10 02:40:49.205548', 'step': 12601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:49.235562', 'step': 12601, 'epoch': 2} {'type': 'loss', 'content': 0.108866386115551, 'timestamp': '2025-09-10 02:40:49.237944', 'step': 12602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:49.274785', 'step': 12602, 'epoch': 2} {'type': 'loss', 'content': 0.052589740604162216, 'timestamp': '2025-09-10 02:40:49.277087', 'step': 12603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:49.306968', 'step': 12603, 'epoch': 2} {'type': 'loss', 'content': 0.09959589689970016, 'timestamp': '2025-09-10 02:40:49.333480', 'step': 12604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:49.364924', 'step': 12604, 'epoch': 2} {'type': 'loss', 'content': 0.07108756899833679, 'timestamp': '2025-09-10 02:40:49.367714', 'step': 12605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:49.398404', 'step': 12605, 'epoch': 2} {'type': 'loss', 'content': 0.06969545036554337, 'timestamp': '2025-09-10 02:40:49.403593', 'step': 12606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:49.435559', 'step': 12606, 'epoch': 2} {'type': 'loss', 'content': 0.10237985104322433, 'timestamp': '2025-09-10 02:40:49.438281', 'step': 12607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:49.472554', 'step': 12607, 'epoch': 2} {'type': 'loss', 'content': 0.1394396424293518, 'timestamp': '2025-09-10 02:40:49.496126', 'step': 12608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:49.532094', 'step': 12608, 'epoch': 2} {'type': 'loss', 'content': 0.012187744490802288, 'timestamp': '2025-09-10 02:40:49.534530', 'step': 12609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:49.565407', 'step': 12609, 'epoch': 2} {'type': 'loss', 'content': 0.14052700996398926, 'timestamp': '2025-09-10 02:40:49.575344', 'step': 12610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:49.609552', 'step': 12610, 'epoch': 2} {'type': 'loss', 'content': 0.06493960320949554, 'timestamp': '2025-09-10 02:40:49.612225', 'step': 12611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:49.642594', 'step': 12611, 'epoch': 2} {'type': 'loss', 'content': 0.09367890655994415, 'timestamp': '2025-09-10 02:40:49.666362', 'step': 12612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:49.711737', 'step': 12612, 'epoch': 2} {'type': 'loss', 'content': 0.1256355494260788, 'timestamp': '2025-09-10 02:40:49.714411', 'step': 12613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:49.744773', 'step': 12613, 'epoch': 2} {'type': 'loss', 'content': 0.0722665786743164, 'timestamp': '2025-09-10 02:40:49.749347', 'step': 12614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:49.782000', 'step': 12614, 'epoch': 2} {'type': 'loss', 'content': 0.09498035907745361, 'timestamp': '2025-09-10 02:40:49.786288', 'step': 12615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:49.816941', 'step': 12615, 'epoch': 2} {'type': 'loss', 'content': 0.08703505247831345, 'timestamp': '2025-09-10 02:40:49.843508', 'step': 12616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:49.878545', 'step': 12616, 'epoch': 2} {'type': 'loss', 'content': 0.10528086125850677, 'timestamp': '2025-09-10 02:40:49.881186', 'step': 12617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:49.913308', 'step': 12617, 'epoch': 2} {'type': 'loss', 'content': 0.10846777260303497, 'timestamp': '2025-09-10 02:40:49.923585', 'step': 12618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:49.956617', 'step': 12618, 'epoch': 2} {'type': 'loss', 'content': 0.1178431510925293, 'timestamp': '2025-09-10 02:40:49.959003', 'step': 12619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:49.989863', 'step': 12619, 'epoch': 2} {'type': 'loss', 'content': 0.13168227672576904, 'timestamp': '2025-09-10 02:40:50.013450', 'step': 12620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:50.051063', 'step': 12620, 'epoch': 2} {'type': 'loss', 'content': 0.04088244214653969, 'timestamp': '2025-09-10 02:40:50.056237', 'step': 12621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:50.086757', 'step': 12621, 'epoch': 2} {'type': 'loss', 'content': 0.0895012840628624, 'timestamp': '2025-09-10 02:40:50.090677', 'step': 12622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:50.123278', 'step': 12622, 'epoch': 2} {'type': 'loss', 'content': 0.1452033519744873, 'timestamp': '2025-09-10 02:40:50.125816', 'step': 12623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:50.156757', 'step': 12623, 'epoch': 2} {'type': 'loss', 'content': 0.12979429960250854, 'timestamp': '2025-09-10 02:40:50.180623', 'step': 12624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:50.217796', 'step': 12624, 'epoch': 2} {'type': 'loss', 'content': 0.13144145905971527, 'timestamp': '2025-09-10 02:40:50.220275', 'step': 12625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:50.250887', 'step': 12625, 'epoch': 2} {'type': 'loss', 'content': 0.08483950048685074, 'timestamp': '2025-09-10 02:40:50.261758', 'step': 12626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:50.299136', 'step': 12626, 'epoch': 2} {'type': 'loss', 'content': 0.18541480600833893, 'timestamp': '2025-09-10 02:40:50.302239', 'step': 12627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:50.339366', 'step': 12627, 'epoch': 2} {'type': 'loss', 'content': 0.05010952427983284, 'timestamp': '2025-09-10 02:40:50.363376', 'step': 12628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:50.393865', 'step': 12628, 'epoch': 2} {'type': 'loss', 'content': 0.16179542243480682, 'timestamp': '2025-09-10 02:40:50.396987', 'step': 12629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:50.434779', 'step': 12629, 'epoch': 2} {'type': 'loss', 'content': 0.10745982080698013, 'timestamp': '2025-09-10 02:40:50.437191', 'step': 12630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:50.468419', 'step': 12630, 'epoch': 2} {'type': 'loss', 'content': 0.15980760753154755, 'timestamp': '2025-09-10 02:40:50.470959', 'step': 12631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:50.501652', 'step': 12631, 'epoch': 2} {'type': 'loss', 'content': 0.14575909078121185, 'timestamp': '2025-09-10 02:40:50.534407', 'step': 12632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:50.575844', 'step': 12632, 'epoch': 2} {'type': 'loss', 'content': 0.03032420389354229, 'timestamp': '2025-09-10 02:40:50.579763', 'step': 12633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:50.611561', 'step': 12633, 'epoch': 2} {'type': 'loss', 'content': 0.0922761857509613, 'timestamp': '2025-09-10 02:40:50.614238', 'step': 12634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:50.645219', 'step': 12634, 'epoch': 2} {'type': 'loss', 'content': 0.1180819571018219, 'timestamp': '2025-09-10 02:40:50.647847', 'step': 12635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:50.680712', 'step': 12635, 'epoch': 2} {'type': 'loss', 'content': 0.041703976690769196, 'timestamp': '2025-09-10 02:40:50.704824', 'step': 12636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:40:50.736341', 'step': 12636, 'epoch': 2} {'type': 'loss', 'content': 0.08453628420829773, 'timestamp': '2025-09-10 02:40:50.738863', 'step': 12637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:50.770466', 'step': 12637, 'epoch': 2} {'type': 'loss', 'content': 0.03880715370178223, 'timestamp': '2025-09-10 02:40:50.773023', 'step': 12638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:50.803413', 'step': 12638, 'epoch': 2} {'type': 'loss', 'content': 0.10793010890483856, 'timestamp': '2025-09-10 02:40:50.805759', 'step': 12639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:50.835952', 'step': 12639, 'epoch': 2} {'type': 'loss', 'content': 0.09951211512088776, 'timestamp': '2025-09-10 02:40:50.859477', 'step': 12640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:50.890283', 'step': 12640, 'epoch': 2} {'type': 'loss', 'content': 0.1024659126996994, 'timestamp': '2025-09-10 02:40:50.892800', 'step': 12641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:50.923168', 'step': 12641, 'epoch': 2} {'type': 'loss', 'content': 0.11219747364521027, 'timestamp': '2025-09-10 02:40:50.927132', 'step': 12642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:50.958415', 'step': 12642, 'epoch': 2} {'type': 'loss', 'content': 0.09930214285850525, 'timestamp': '2025-09-10 02:40:50.961251', 'step': 12643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:50.992277', 'step': 12643, 'epoch': 2} {'type': 'loss', 'content': 0.11052224040031433, 'timestamp': '2025-09-10 02:40:51.016578', 'step': 12644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.046748', 'step': 12644, 'epoch': 2} {'type': 'loss', 'content': 0.12799111008644104, 'timestamp': '2025-09-10 02:40:51.048939', 'step': 12645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:51.078623', 'step': 12645, 'epoch': 2} {'type': 'loss', 'content': 0.13720639050006866, 'timestamp': '2025-09-10 02:40:51.081120', 'step': 12646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.111357', 'step': 12646, 'epoch': 2} {'type': 'loss', 'content': 0.07843371480703354, 'timestamp': '2025-09-10 02:40:51.113730', 'step': 12647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.143673', 'step': 12647, 'epoch': 2} {'type': 'loss', 'content': 0.1706760823726654, 'timestamp': '2025-09-10 02:40:51.167495', 'step': 12648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.197180', 'step': 12648, 'epoch': 2} {'type': 'loss', 'content': 0.1609933227300644, 'timestamp': '2025-09-10 02:40:51.199540', 'step': 12649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:51.229710', 'step': 12649, 'epoch': 2} {'type': 'loss', 'content': 0.09443608671426773, 'timestamp': '2025-09-10 02:40:51.232535', 'step': 12650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.262458', 'step': 12650, 'epoch': 2} {'type': 'loss', 'content': 0.11668657511472702, 'timestamp': '2025-09-10 02:40:51.264780', 'step': 12651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:51.294443', 'step': 12651, 'epoch': 2} {'type': 'loss', 'content': 0.12352877110242844, 'timestamp': '2025-09-10 02:40:51.318535', 'step': 12652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:51.349119', 'step': 12652, 'epoch': 2} {'type': 'loss', 'content': 0.06858661770820618, 'timestamp': '2025-09-10 02:40:51.351626', 'step': 12653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.382264', 'step': 12653, 'epoch': 2} {'type': 'loss', 'content': 0.1093178316950798, 'timestamp': '2025-09-10 02:40:51.384770', 'step': 12654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:51.417823', 'step': 12654, 'epoch': 2} {'type': 'loss', 'content': 0.0889260396361351, 'timestamp': '2025-09-10 02:40:51.420966', 'step': 12655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.452912', 'step': 12655, 'epoch': 2} {'type': 'loss', 'content': 0.08093886822462082, 'timestamp': '2025-09-10 02:40:51.476607', 'step': 12656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:51.507807', 'step': 12656, 'epoch': 2} {'type': 'loss', 'content': 0.20328521728515625, 'timestamp': '2025-09-10 02:40:51.510313', 'step': 12657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.540326', 'step': 12657, 'epoch': 2} {'type': 'loss', 'content': 0.16466949880123138, 'timestamp': '2025-09-10 02:40:51.543058', 'step': 12658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:51.573251', 'step': 12658, 'epoch': 2} {'type': 'loss', 'content': 0.09235751628875732, 'timestamp': '2025-09-10 02:40:51.575383', 'step': 12659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:51.604820', 'step': 12659, 'epoch': 2} {'type': 'loss', 'content': 0.1759900152683258, 'timestamp': '2025-09-10 02:40:51.629801', 'step': 12660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.661689', 'step': 12660, 'epoch': 2} {'type': 'loss', 'content': 0.19752369821071625, 'timestamp': '2025-09-10 02:40:51.664156', 'step': 12661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:51.693822', 'step': 12661, 'epoch': 2} {'type': 'loss', 'content': 0.1566530019044876, 'timestamp': '2025-09-10 02:40:51.696430', 'step': 12662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:40:51.727752', 'step': 12662, 'epoch': 2} {'type': 'loss', 'content': 0.08915199339389801, 'timestamp': '2025-09-10 02:40:51.732149', 'step': 12663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:51.764521', 'step': 12663, 'epoch': 2} {'type': 'loss', 'content': 0.08917804807424545, 'timestamp': '2025-09-10 02:40:51.788118', 'step': 12664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:51.818973', 'step': 12664, 'epoch': 2} {'type': 'loss', 'content': 0.08414723724126816, 'timestamp': '2025-09-10 02:40:51.821232', 'step': 12665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:51.851594', 'step': 12665, 'epoch': 2} {'type': 'loss', 'content': 0.08891307562589645, 'timestamp': '2025-09-10 02:40:51.853978', 'step': 12666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:51.884510', 'step': 12666, 'epoch': 2} {'type': 'loss', 'content': 0.08215626329183578, 'timestamp': '2025-09-10 02:40:51.886902', 'step': 12667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:51.916899', 'step': 12667, 'epoch': 2} {'type': 'loss', 'content': 0.09273447096347809, 'timestamp': '2025-09-10 02:40:51.940853', 'step': 12668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:51.971300', 'step': 12668, 'epoch': 2} {'type': 'loss', 'content': 0.14151911437511444, 'timestamp': '2025-09-10 02:40:51.973612', 'step': 12669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:52.003807', 'step': 12669, 'epoch': 2} {'type': 'loss', 'content': 0.1357143521308899, 'timestamp': '2025-09-10 02:40:52.006791', 'step': 12670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.037188', 'step': 12670, 'epoch': 2} {'type': 'loss', 'content': 0.0594078004360199, 'timestamp': '2025-09-10 02:40:52.041029', 'step': 12671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:52.072054', 'step': 12671, 'epoch': 2} {'type': 'loss', 'content': 0.0791076272726059, 'timestamp': '2025-09-10 02:40:52.095727', 'step': 12672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:52.126483', 'step': 12672, 'epoch': 2} {'type': 'loss', 'content': 0.054671961814165115, 'timestamp': '2025-09-10 02:40:52.129236', 'step': 12673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.159274', 'step': 12673, 'epoch': 2} {'type': 'loss', 'content': 0.17256060242652893, 'timestamp': '2025-09-10 02:40:52.161624', 'step': 12674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.191597', 'step': 12674, 'epoch': 2} {'type': 'loss', 'content': 0.07451971620321274, 'timestamp': '2025-09-10 02:40:52.194479', 'step': 12675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:52.225027', 'step': 12675, 'epoch': 2} {'type': 'loss', 'content': 0.11256609112024307, 'timestamp': '2025-09-10 02:40:52.248481', 'step': 12676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:52.278782', 'step': 12676, 'epoch': 2} {'type': 'loss', 'content': 0.07895291596651077, 'timestamp': '2025-09-10 02:40:52.281504', 'step': 12677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.311743', 'step': 12677, 'epoch': 2} {'type': 'loss', 'content': 0.13612541556358337, 'timestamp': '2025-09-10 02:40:52.314457', 'step': 12678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:52.344269', 'step': 12678, 'epoch': 2} {'type': 'loss', 'content': 0.06993383914232254, 'timestamp': '2025-09-10 02:40:52.347125', 'step': 12679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.377088', 'step': 12679, 'epoch': 2} {'type': 'loss', 'content': 0.10758700966835022, 'timestamp': '2025-09-10 02:40:52.400988', 'step': 12680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:52.432032', 'step': 12680, 'epoch': 2} {'type': 'loss', 'content': 0.2562626302242279, 'timestamp': '2025-09-10 02:40:52.434507', 'step': 12681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.465037', 'step': 12681, 'epoch': 2} {'type': 'loss', 'content': 0.16182725131511688, 'timestamp': '2025-09-10 02:40:52.467462', 'step': 12682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:52.497490', 'step': 12682, 'epoch': 2} {'type': 'loss', 'content': 0.04101867601275444, 'timestamp': '2025-09-10 02:40:52.499977', 'step': 12683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:52.529870', 'step': 12683, 'epoch': 2} {'type': 'loss', 'content': 0.09417017549276352, 'timestamp': '2025-09-10 02:40:52.553385', 'step': 12684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.584973', 'step': 12684, 'epoch': 2} {'type': 'loss', 'content': 0.06995926797389984, 'timestamp': '2025-09-10 02:40:52.587446', 'step': 12685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:52.617851', 'step': 12685, 'epoch': 2} {'type': 'loss', 'content': 0.04065882787108421, 'timestamp': '2025-09-10 02:40:52.620612', 'step': 12686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.652193', 'step': 12686, 'epoch': 2} {'type': 'loss', 'content': 0.11284554749727249, 'timestamp': '2025-09-10 02:40:52.654752', 'step': 12687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:52.687292', 'step': 12687, 'epoch': 2} {'type': 'loss', 'content': 0.08245968818664551, 'timestamp': '2025-09-10 02:40:52.711243', 'step': 12688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:52.741497', 'step': 12688, 'epoch': 2} {'type': 'loss', 'content': 0.17284338176250458, 'timestamp': '2025-09-10 02:40:52.744034', 'step': 12689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:52.774745', 'step': 12689, 'epoch': 2} {'type': 'loss', 'content': 0.12460149824619293, 'timestamp': '2025-09-10 02:40:52.777304', 'step': 12690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:52.807035', 'step': 12690, 'epoch': 2} {'type': 'loss', 'content': 0.04801883175969124, 'timestamp': '2025-09-10 02:40:52.809556', 'step': 12691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:52.839712', 'step': 12691, 'epoch': 2} {'type': 'loss', 'content': 0.06380464881658554, 'timestamp': '2025-09-10 02:40:52.863659', 'step': 12692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:52.893579', 'step': 12692, 'epoch': 2} {'type': 'loss', 'content': 0.03284880891442299, 'timestamp': '2025-09-10 02:40:52.896187', 'step': 12693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.926386', 'step': 12693, 'epoch': 2} {'type': 'loss', 'content': 0.17118245363235474, 'timestamp': '2025-09-10 02:40:52.929141', 'step': 12694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:52.958648', 'step': 12694, 'epoch': 2} {'type': 'loss', 'content': 0.025831691920757294, 'timestamp': '2025-09-10 02:40:52.961075', 'step': 12695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:52.991189', 'step': 12695, 'epoch': 2} {'type': 'loss', 'content': 0.09784163534641266, 'timestamp': '2025-09-10 02:40:53.014728', 'step': 12696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.046185', 'step': 12696, 'epoch': 2} {'type': 'loss', 'content': 0.07256928831338882, 'timestamp': '2025-09-10 02:40:53.048649', 'step': 12697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:53.080300', 'step': 12697, 'epoch': 2} {'type': 'loss', 'content': 0.15496763586997986, 'timestamp': '2025-09-10 02:40:53.082890', 'step': 12698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:53.112409', 'step': 12698, 'epoch': 2} {'type': 'loss', 'content': 0.11151781678199768, 'timestamp': '2025-09-10 02:40:53.115359', 'step': 12699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.144993', 'step': 12699, 'epoch': 2} {'type': 'loss', 'content': 0.11506055295467377, 'timestamp': '2025-09-10 02:40:53.168740', 'step': 12700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.198867', 'step': 12700, 'epoch': 2} {'type': 'loss', 'content': 0.07780619710683823, 'timestamp': '2025-09-10 02:40:53.202604', 'step': 12701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:53.232892', 'step': 12701, 'epoch': 2} {'type': 'loss', 'content': 0.10343118011951447, 'timestamp': '2025-09-10 02:40:53.235297', 'step': 12702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:53.265825', 'step': 12702, 'epoch': 2} {'type': 'loss', 'content': 0.08796588331460953, 'timestamp': '2025-09-10 02:40:53.268424', 'step': 12703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:53.305512', 'step': 12703, 'epoch': 2} {'type': 'loss', 'content': 0.1267898827791214, 'timestamp': '2025-09-10 02:40:53.329133', 'step': 12704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:53.359032', 'step': 12704, 'epoch': 2} {'type': 'loss', 'content': 0.04037592560052872, 'timestamp': '2025-09-10 02:40:53.361503', 'step': 12705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.391204', 'step': 12705, 'epoch': 2} {'type': 'loss', 'content': 0.10245721787214279, 'timestamp': '2025-09-10 02:40:53.393667', 'step': 12706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:53.423762', 'step': 12706, 'epoch': 2} {'type': 'loss', 'content': 0.1230565533041954, 'timestamp': '2025-09-10 02:40:53.426197', 'step': 12707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:53.456230', 'step': 12707, 'epoch': 2} {'type': 'loss', 'content': 0.09656636416912079, 'timestamp': '2025-09-10 02:40:53.481426', 'step': 12708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.511122', 'step': 12708, 'epoch': 2} {'type': 'loss', 'content': 0.055163685232400894, 'timestamp': '2025-09-10 02:40:53.513454', 'step': 12709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:53.543695', 'step': 12709, 'epoch': 2} {'type': 'loss', 'content': 0.13121628761291504, 'timestamp': '2025-09-10 02:40:53.547225', 'step': 12710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.577053', 'step': 12710, 'epoch': 2} {'type': 'loss', 'content': 0.06212284415960312, 'timestamp': '2025-09-10 02:40:53.579273', 'step': 12711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:53.609347', 'step': 12711, 'epoch': 2} {'type': 'loss', 'content': 0.09890962392091751, 'timestamp': '2025-09-10 02:40:53.633347', 'step': 12712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.663985', 'step': 12712, 'epoch': 2} {'type': 'loss', 'content': 0.1296752244234085, 'timestamp': '2025-09-10 02:40:53.666334', 'step': 12713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.696287', 'step': 12713, 'epoch': 2} {'type': 'loss', 'content': 0.15035414695739746, 'timestamp': '2025-09-10 02:40:53.698848', 'step': 12714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:53.730538', 'step': 12714, 'epoch': 2} {'type': 'loss', 'content': 0.17307831346988678, 'timestamp': '2025-09-10 02:40:53.733543', 'step': 12715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.764445', 'step': 12715, 'epoch': 2} {'type': 'loss', 'content': 0.11265795677900314, 'timestamp': '2025-09-10 02:40:53.788322', 'step': 12716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:53.819063', 'step': 12716, 'epoch': 2} {'type': 'loss', 'content': 0.1040438711643219, 'timestamp': '2025-09-10 02:40:53.821367', 'step': 12717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:53.851417', 'step': 12717, 'epoch': 2} {'type': 'loss', 'content': 0.08883719146251678, 'timestamp': '2025-09-10 02:40:53.853851', 'step': 12718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:53.884645', 'step': 12718, 'epoch': 2} {'type': 'loss', 'content': 0.059404246509075165, 'timestamp': '2025-09-10 02:40:53.887061', 'step': 12719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:53.917635', 'step': 12719, 'epoch': 2} {'type': 'loss', 'content': 0.04796247556805611, 'timestamp': '2025-09-10 02:40:53.941200', 'step': 12720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:53.972153', 'step': 12720, 'epoch': 2} {'type': 'loss', 'content': 0.05913929641246796, 'timestamp': '2025-09-10 02:40:53.974500', 'step': 12721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:54.004657', 'step': 12721, 'epoch': 2} {'type': 'loss', 'content': 0.06733657419681549, 'timestamp': '2025-09-10 02:40:54.007103', 'step': 12722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:54.036967', 'step': 12722, 'epoch': 2} {'type': 'loss', 'content': 0.052392665296792984, 'timestamp': '2025-09-10 02:40:54.039735', 'step': 12723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:54.070911', 'step': 12723, 'epoch': 2} {'type': 'loss', 'content': 0.07472050189971924, 'timestamp': '2025-09-10 02:40:54.094834', 'step': 12724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:54.126029', 'step': 12724, 'epoch': 2} {'type': 'loss', 'content': 0.18989911675453186, 'timestamp': '2025-09-10 02:40:54.129453', 'step': 12725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:54.160191', 'step': 12725, 'epoch': 2} {'type': 'loss', 'content': 0.1301957070827484, 'timestamp': '2025-09-10 02:40:54.162799', 'step': 12726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:54.194877', 'step': 12726, 'epoch': 2} {'type': 'loss', 'content': 0.11774300038814545, 'timestamp': '2025-09-10 02:40:54.197561', 'step': 12727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:54.227994', 'step': 12727, 'epoch': 2} {'type': 'loss', 'content': 0.05782125145196915, 'timestamp': '2025-09-10 02:40:54.251815', 'step': 12728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:54.281974', 'step': 12728, 'epoch': 2} {'type': 'loss', 'content': 0.12606675922870636, 'timestamp': '2025-09-10 02:40:54.286396', 'step': 12729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:54.317383', 'step': 12729, 'epoch': 2} {'type': 'loss', 'content': 0.11556435376405716, 'timestamp': '2025-09-10 02:40:54.319601', 'step': 12730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:54.350147', 'step': 12730, 'epoch': 2} {'type': 'loss', 'content': 0.12996260821819305, 'timestamp': '2025-09-10 02:40:54.352673', 'step': 12731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:54.382717', 'step': 12731, 'epoch': 2} {'type': 'loss', 'content': 0.04534054175019264, 'timestamp': '2025-09-10 02:40:54.406654', 'step': 12732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:54.436789', 'step': 12732, 'epoch': 2} {'type': 'loss', 'content': 0.1723499596118927, 'timestamp': '2025-09-10 02:40:54.439481', 'step': 12733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:54.469910', 'step': 12733, 'epoch': 2} {'type': 'loss', 'content': 0.1824733316898346, 'timestamp': '2025-09-10 02:40:54.473558', 'step': 12734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:54.503047', 'step': 12734, 'epoch': 2} {'type': 'loss', 'content': 0.1464196741580963, 'timestamp': '2025-09-10 02:40:54.505499', 'step': 12735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:54.536364', 'step': 12735, 'epoch': 2} {'type': 'loss', 'content': 0.09198124706745148, 'timestamp': '2025-09-10 02:40:54.559814', 'step': 12736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:54.590947', 'step': 12736, 'epoch': 2} {'type': 'loss', 'content': 0.048218946903944016, 'timestamp': '2025-09-10 02:40:54.593438', 'step': 12737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:54.623388', 'step': 12737, 'epoch': 2} {'type': 'loss', 'content': 0.11146900802850723, 'timestamp': '2025-09-10 02:40:54.625735', 'step': 12738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:54.656759', 'step': 12738, 'epoch': 2} {'type': 'loss', 'content': 0.20830415189266205, 'timestamp': '2025-09-10 02:40:54.659805', 'step': 12739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:54.694954', 'step': 12739, 'epoch': 2} {'type': 'loss', 'content': 0.08606265485286713, 'timestamp': '2025-09-10 02:40:54.719182', 'step': 12740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:54.750961', 'step': 12740, 'epoch': 2} {'type': 'loss', 'content': 0.15959098935127258, 'timestamp': '2025-09-10 02:40:54.753877', 'step': 12741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:54.786049', 'step': 12741, 'epoch': 2} {'type': 'loss', 'content': 0.14863495528697968, 'timestamp': '2025-09-10 02:40:54.789239', 'step': 12742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:54.820349', 'step': 12742, 'epoch': 2} {'type': 'loss', 'content': 0.1022237241268158, 'timestamp': '2025-09-10 02:40:54.823560', 'step': 12743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:54.854316', 'step': 12743, 'epoch': 2} {'type': 'loss', 'content': 0.10411179810762405, 'timestamp': '2025-09-10 02:40:54.878109', 'step': 12744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:54.908573', 'step': 12744, 'epoch': 2} {'type': 'loss', 'content': 0.16494996845722198, 'timestamp': '2025-09-10 02:40:54.911145', 'step': 12745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:54.940480', 'step': 12745, 'epoch': 2} {'type': 'loss', 'content': 0.2477254569530487, 'timestamp': '2025-09-10 02:40:54.942672', 'step': 12746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:54.973148', 'step': 12746, 'epoch': 2} {'type': 'loss', 'content': 0.08020872622728348, 'timestamp': '2025-09-10 02:40:54.975806', 'step': 12747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.007400', 'step': 12747, 'epoch': 2} {'type': 'loss', 'content': 0.09362466633319855, 'timestamp': '2025-09-10 02:40:55.031064', 'step': 12748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:55.061290', 'step': 12748, 'epoch': 2} {'type': 'loss', 'content': 0.16274692118167877, 'timestamp': '2025-09-10 02:40:55.063995', 'step': 12749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:55.093495', 'step': 12749, 'epoch': 2} {'type': 'loss', 'content': 0.1095268651843071, 'timestamp': '2025-09-10 02:40:55.095739', 'step': 12750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:55.124693', 'step': 12750, 'epoch': 2} {'type': 'loss', 'content': 0.17007973790168762, 'timestamp': '2025-09-10 02:40:55.126910', 'step': 12751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:55.156335', 'step': 12751, 'epoch': 2} {'type': 'loss', 'content': 0.11209388822317123, 'timestamp': '2025-09-10 02:40:55.180059', 'step': 12752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:55.213889', 'step': 12752, 'epoch': 2} {'type': 'loss', 'content': 0.1669761687517166, 'timestamp': '2025-09-10 02:40:55.216035', 'step': 12753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.245418', 'step': 12753, 'epoch': 2} {'type': 'loss', 'content': 0.05724930763244629, 'timestamp': '2025-09-10 02:40:55.247666', 'step': 12754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:55.277126', 'step': 12754, 'epoch': 2} {'type': 'loss', 'content': 0.08977234363555908, 'timestamp': '2025-09-10 02:40:55.284876', 'step': 12755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:55.316251', 'step': 12755, 'epoch': 2} {'type': 'loss', 'content': 0.08243495970964432, 'timestamp': '2025-09-10 02:40:55.340130', 'step': 12756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.369993', 'step': 12756, 'epoch': 2} {'type': 'loss', 'content': 0.157875195145607, 'timestamp': '2025-09-10 02:40:55.372767', 'step': 12757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.402829', 'step': 12757, 'epoch': 2} {'type': 'loss', 'content': 0.12637634575366974, 'timestamp': '2025-09-10 02:40:55.405884', 'step': 12758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:55.436629', 'step': 12758, 'epoch': 2} {'type': 'loss', 'content': 0.11974203586578369, 'timestamp': '2025-09-10 02:40:55.438899', 'step': 12759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:55.468597', 'step': 12759, 'epoch': 2} {'type': 'loss', 'content': 0.12277010083198547, 'timestamp': '2025-09-10 02:40:55.492181', 'step': 12760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.522197', 'step': 12760, 'epoch': 2} {'type': 'loss', 'content': 0.1064138263463974, 'timestamp': '2025-09-10 02:40:55.524966', 'step': 12761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:55.555679', 'step': 12761, 'epoch': 2} {'type': 'loss', 'content': 0.10423184931278229, 'timestamp': '2025-09-10 02:40:55.558343', 'step': 12762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.589798', 'step': 12762, 'epoch': 2} {'type': 'loss', 'content': 0.17282801866531372, 'timestamp': '2025-09-10 02:40:55.592080', 'step': 12763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.621592', 'step': 12763, 'epoch': 2} {'type': 'loss', 'content': 0.041655950248241425, 'timestamp': '2025-09-10 02:40:55.645194', 'step': 12764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.675295', 'step': 12764, 'epoch': 2} {'type': 'loss', 'content': 0.05687466636300087, 'timestamp': '2025-09-10 02:40:55.677365', 'step': 12765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:55.706516', 'step': 12765, 'epoch': 2} {'type': 'loss', 'content': 0.10890225321054459, 'timestamp': '2025-09-10 02:40:55.709482', 'step': 12766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.739195', 'step': 12766, 'epoch': 2} {'type': 'loss', 'content': 0.09434521943330765, 'timestamp': '2025-09-10 02:40:55.741874', 'step': 12767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:55.771298', 'step': 12767, 'epoch': 2} {'type': 'loss', 'content': 0.11283034831285477, 'timestamp': '2025-09-10 02:40:55.794900', 'step': 12768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.825437', 'step': 12768, 'epoch': 2} {'type': 'loss', 'content': 0.11853744089603424, 'timestamp': '2025-09-10 02:40:55.828000', 'step': 12769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:55.858018', 'step': 12769, 'epoch': 2} {'type': 'loss', 'content': 0.09642384201288223, 'timestamp': '2025-09-10 02:40:55.860668', 'step': 12770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:55.889950', 'step': 12770, 'epoch': 2} {'type': 'loss', 'content': 0.056680697947740555, 'timestamp': '2025-09-10 02:40:55.892988', 'step': 12771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:55.923694', 'step': 12771, 'epoch': 2} {'type': 'loss', 'content': 0.10073359310626984, 'timestamp': '2025-09-10 02:40:55.947409', 'step': 12772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:55.977594', 'step': 12772, 'epoch': 2} {'type': 'loss', 'content': 0.1479974240064621, 'timestamp': '2025-09-10 02:40:55.979840', 'step': 12773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.012198', 'step': 12773, 'epoch': 2} {'type': 'loss', 'content': 0.24550111591815948, 'timestamp': '2025-09-10 02:40:56.014660', 'step': 12774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:56.044331', 'step': 12774, 'epoch': 2} {'type': 'loss', 'content': 0.13118907809257507, 'timestamp': '2025-09-10 02:40:56.046544', 'step': 12775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:56.076636', 'step': 12775, 'epoch': 2} {'type': 'loss', 'content': 0.15820907056331635, 'timestamp': '2025-09-10 02:40:56.100439', 'step': 12776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:56.130593', 'step': 12776, 'epoch': 2} {'type': 'loss', 'content': 0.06758205592632294, 'timestamp': '2025-09-10 02:40:56.133075', 'step': 12777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.163736', 'step': 12777, 'epoch': 2} {'type': 'loss', 'content': 0.06484021246433258, 'timestamp': '2025-09-10 02:40:56.166094', 'step': 12778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.196228', 'step': 12778, 'epoch': 2} {'type': 'loss', 'content': 0.07288235425949097, 'timestamp': '2025-09-10 02:40:56.198676', 'step': 12779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.228066', 'step': 12779, 'epoch': 2} {'type': 'loss', 'content': 0.15655428171157837, 'timestamp': '2025-09-10 02:40:56.251743', 'step': 12780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:56.281914', 'step': 12780, 'epoch': 2} {'type': 'loss', 'content': 0.09460340440273285, 'timestamp': '2025-09-10 02:40:56.285075', 'step': 12781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:56.314602', 'step': 12781, 'epoch': 2} {'type': 'loss', 'content': 0.07503655552864075, 'timestamp': '2025-09-10 02:40:56.317295', 'step': 12782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:56.346921', 'step': 12782, 'epoch': 2} {'type': 'loss', 'content': 0.11154711991548538, 'timestamp': '2025-09-10 02:40:56.349460', 'step': 12783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:56.379427', 'step': 12783, 'epoch': 2} {'type': 'loss', 'content': 0.21282759308815002, 'timestamp': '2025-09-10 02:40:56.402754', 'step': 12784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:56.439278', 'step': 12784, 'epoch': 2} {'type': 'loss', 'content': 0.08110629767179489, 'timestamp': '2025-09-10 02:40:56.441237', 'step': 12785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.470480', 'step': 12785, 'epoch': 2} {'type': 'loss', 'content': 0.07341726124286652, 'timestamp': '2025-09-10 02:40:56.472994', 'step': 12786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.502923', 'step': 12786, 'epoch': 2} {'type': 'loss', 'content': 0.06109223887324333, 'timestamp': '2025-09-10 02:40:56.505419', 'step': 12787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.537000', 'step': 12787, 'epoch': 2} {'type': 'loss', 'content': 0.0970655158162117, 'timestamp': '2025-09-10 02:40:56.560714', 'step': 12788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:56.592500', 'step': 12788, 'epoch': 2} {'type': 'loss', 'content': 0.11166457086801529, 'timestamp': '2025-09-10 02:40:56.594897', 'step': 12789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:40:56.625743', 'step': 12789, 'epoch': 2} {'type': 'loss', 'content': 0.07850703597068787, 'timestamp': '2025-09-10 02:40:56.630711', 'step': 12790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.660438', 'step': 12790, 'epoch': 2} {'type': 'loss', 'content': 0.12221549451351166, 'timestamp': '2025-09-10 02:40:56.662735', 'step': 12791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:56.693645', 'step': 12791, 'epoch': 2} {'type': 'loss', 'content': 0.15542320907115936, 'timestamp': '2025-09-10 02:40:56.719647', 'step': 12792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.757290', 'step': 12792, 'epoch': 2} {'type': 'loss', 'content': 0.1251997947692871, 'timestamp': '2025-09-10 02:40:56.759267', 'step': 12793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:56.790063', 'step': 12793, 'epoch': 2} {'type': 'loss', 'content': 0.049062617123126984, 'timestamp': '2025-09-10 02:40:56.792668', 'step': 12794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.823164', 'step': 12794, 'epoch': 2} {'type': 'loss', 'content': 0.18352490663528442, 'timestamp': '2025-09-10 02:40:56.825555', 'step': 12795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:56.855154', 'step': 12795, 'epoch': 2} {'type': 'loss', 'content': 0.11072005331516266, 'timestamp': '2025-09-10 02:40:56.878722', 'step': 12796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.908673', 'step': 12796, 'epoch': 2} {'type': 'loss', 'content': 0.10330735892057419, 'timestamp': '2025-09-10 02:40:56.912793', 'step': 12797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:56.943246', 'step': 12797, 'epoch': 2} {'type': 'loss', 'content': 0.12754997611045837, 'timestamp': '2025-09-10 02:40:56.946123', 'step': 12798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:56.975883', 'step': 12798, 'epoch': 2} {'type': 'loss', 'content': 0.11737869679927826, 'timestamp': '2025-09-10 02:40:56.978500', 'step': 12799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:57.008050', 'step': 12799, 'epoch': 2} {'type': 'loss', 'content': 0.08883625268936157, 'timestamp': '2025-09-10 02:40:57.033161', 'step': 12800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.062411', 'step': 12800, 'epoch': 2} {'type': 'loss', 'content': 0.04439092054963112, 'timestamp': '2025-09-10 02:40:57.064732', 'step': 12801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:57.096271', 'step': 12801, 'epoch': 2} {'type': 'loss', 'content': 0.01591734029352665, 'timestamp': '2025-09-10 02:40:57.098408', 'step': 12802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:57.129347', 'step': 12802, 'epoch': 2} {'type': 'loss', 'content': 0.16556324064731598, 'timestamp': '2025-09-10 02:40:57.131583', 'step': 12803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:57.161649', 'step': 12803, 'epoch': 2} {'type': 'loss', 'content': 0.12971231341362, 'timestamp': '2025-09-10 02:40:57.185293', 'step': 12804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:57.215549', 'step': 12804, 'epoch': 2} {'type': 'loss', 'content': 0.08535052090883255, 'timestamp': '2025-09-10 02:40:57.221442', 'step': 12805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:57.251817', 'step': 12805, 'epoch': 2} {'type': 'loss', 'content': 0.1388961523771286, 'timestamp': '2025-09-10 02:40:57.254063', 'step': 12806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:57.283635', 'step': 12806, 'epoch': 2} {'type': 'loss', 'content': 0.14520488679409027, 'timestamp': '2025-09-10 02:40:57.285834', 'step': 12807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.315864', 'step': 12807, 'epoch': 2} {'type': 'loss', 'content': 0.11192171275615692, 'timestamp': '2025-09-10 02:40:57.339342', 'step': 12808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:57.369699', 'step': 12808, 'epoch': 2} {'type': 'loss', 'content': 0.12567652761936188, 'timestamp': '2025-09-10 02:40:57.371980', 'step': 12809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:57.402209', 'step': 12809, 'epoch': 2} {'type': 'loss', 'content': 0.15514984726905823, 'timestamp': '2025-09-10 02:40:57.404967', 'step': 12810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:57.436649', 'step': 12810, 'epoch': 2} {'type': 'loss', 'content': 0.12770958244800568, 'timestamp': '2025-09-10 02:40:57.439035', 'step': 12811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.468660', 'step': 12811, 'epoch': 2} {'type': 'loss', 'content': 0.15716254711151123, 'timestamp': '2025-09-10 02:40:57.492006', 'step': 12812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.521564', 'step': 12812, 'epoch': 2} {'type': 'loss', 'content': 0.061253223568201065, 'timestamp': '2025-09-10 02:40:57.523794', 'step': 12813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:57.553567', 'step': 12813, 'epoch': 2} {'type': 'loss', 'content': 0.08657272160053253, 'timestamp': '2025-09-10 02:40:57.556208', 'step': 12814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:57.585695', 'step': 12814, 'epoch': 2} {'type': 'loss', 'content': 0.13588960468769073, 'timestamp': '2025-09-10 02:40:57.587941', 'step': 12815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:57.618133', 'step': 12815, 'epoch': 2} {'type': 'loss', 'content': 0.11975304782390594, 'timestamp': '2025-09-10 02:40:57.641781', 'step': 12816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.671213', 'step': 12816, 'epoch': 2} {'type': 'loss', 'content': 0.11385291069746017, 'timestamp': '2025-09-10 02:40:57.673478', 'step': 12817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:57.705741', 'step': 12817, 'epoch': 2} {'type': 'loss', 'content': 0.0783635675907135, 'timestamp': '2025-09-10 02:40:57.708312', 'step': 12818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.739090', 'step': 12818, 'epoch': 2} {'type': 'loss', 'content': 0.049877047538757324, 'timestamp': '2025-09-10 02:40:57.741322', 'step': 12819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:57.770919', 'step': 12819, 'epoch': 2} {'type': 'loss', 'content': 0.10816123336553574, 'timestamp': '2025-09-10 02:40:57.794417', 'step': 12820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.824740', 'step': 12820, 'epoch': 2} {'type': 'loss', 'content': 0.07985000312328339, 'timestamp': '2025-09-10 02:40:57.827086', 'step': 12821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.856758', 'step': 12821, 'epoch': 2} {'type': 'loss', 'content': 0.08278441429138184, 'timestamp': '2025-09-10 02:40:57.858920', 'step': 12822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.888399', 'step': 12822, 'epoch': 2} {'type': 'loss', 'content': 0.11604183167219162, 'timestamp': '2025-09-10 02:40:57.890794', 'step': 12823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:57.920822', 'step': 12823, 'epoch': 2} {'type': 'loss', 'content': 0.11762924492359161, 'timestamp': '2025-09-10 02:40:57.944845', 'step': 12824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:57.975811', 'step': 12824, 'epoch': 2} {'type': 'loss', 'content': 0.07599334418773651, 'timestamp': '2025-09-10 02:40:57.977873', 'step': 12825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.006958', 'step': 12825, 'epoch': 2} {'type': 'loss', 'content': 0.12262685596942902, 'timestamp': '2025-09-10 02:40:58.010684', 'step': 12826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:58.044056', 'step': 12826, 'epoch': 2} {'type': 'loss', 'content': 0.054119277745485306, 'timestamp': '2025-09-10 02:40:58.046336', 'step': 12827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:58.075882', 'step': 12827, 'epoch': 2} {'type': 'loss', 'content': 0.061555832624435425, 'timestamp': '2025-09-10 02:40:58.099274', 'step': 12828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.129553', 'step': 12828, 'epoch': 2} {'type': 'loss', 'content': 0.13086985051631927, 'timestamp': '2025-09-10 02:40:58.131967', 'step': 12829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.161885', 'step': 12829, 'epoch': 2} {'type': 'loss', 'content': 0.09679552912712097, 'timestamp': '2025-09-10 02:40:58.164301', 'step': 12830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:40:58.193943', 'step': 12830, 'epoch': 2} {'type': 'loss', 'content': 0.07840923964977264, 'timestamp': '2025-09-10 02:40:58.196094', 'step': 12831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:58.225493', 'step': 12831, 'epoch': 2} {'type': 'loss', 'content': 0.16606731712818146, 'timestamp': '2025-09-10 02:40:58.249173', 'step': 12832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:58.279245', 'step': 12832, 'epoch': 2} {'type': 'loss', 'content': 0.08473838865756989, 'timestamp': '2025-09-10 02:40:58.286681', 'step': 12833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:58.319689', 'step': 12833, 'epoch': 2} {'type': 'loss', 'content': 0.05904819816350937, 'timestamp': '2025-09-10 02:40:58.323121', 'step': 12834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:58.352911', 'step': 12834, 'epoch': 2} {'type': 'loss', 'content': 0.06063937395811081, 'timestamp': '2025-09-10 02:40:58.355372', 'step': 12835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:58.385095', 'step': 12835, 'epoch': 2} {'type': 'loss', 'content': 0.07311299443244934, 'timestamp': '2025-09-10 02:40:58.408696', 'step': 12836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:58.438487', 'step': 12836, 'epoch': 2} {'type': 'loss', 'content': 0.07886314392089844, 'timestamp': '2025-09-10 02:40:58.440843', 'step': 12837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.470505', 'step': 12837, 'epoch': 2} {'type': 'loss', 'content': 0.06956493854522705, 'timestamp': '2025-09-10 02:40:58.472640', 'step': 12838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.501610', 'step': 12838, 'epoch': 2} {'type': 'loss', 'content': 0.13868378102779388, 'timestamp': '2025-09-10 02:40:58.503810', 'step': 12839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.535194', 'step': 12839, 'epoch': 2} {'type': 'loss', 'content': 0.11502493172883987, 'timestamp': '2025-09-10 02:40:58.558735', 'step': 12840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:58.588957', 'step': 12840, 'epoch': 2} {'type': 'loss', 'content': 0.10570724308490753, 'timestamp': '2025-09-10 02:40:58.591296', 'step': 12841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:58.621164', 'step': 12841, 'epoch': 2} {'type': 'loss', 'content': 0.0808672308921814, 'timestamp': '2025-09-10 02:40:58.624428', 'step': 12842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.655353', 'step': 12842, 'epoch': 2} {'type': 'loss', 'content': 0.05835990235209465, 'timestamp': '2025-09-10 02:40:58.657922', 'step': 12843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:58.688549', 'step': 12843, 'epoch': 2} {'type': 'loss', 'content': 0.10070294141769409, 'timestamp': '2025-09-10 02:40:58.712223', 'step': 12844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:58.742528', 'step': 12844, 'epoch': 2} {'type': 'loss', 'content': 0.09404684603214264, 'timestamp': '2025-09-10 02:40:58.744983', 'step': 12845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:58.774646', 'step': 12845, 'epoch': 2} {'type': 'loss', 'content': 0.06873302906751633, 'timestamp': '2025-09-10 02:40:58.778761', 'step': 12846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.809800', 'step': 12846, 'epoch': 2} {'type': 'loss', 'content': 0.09353639930486679, 'timestamp': '2025-09-10 02:40:58.812166', 'step': 12847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:58.842505', 'step': 12847, 'epoch': 2} {'type': 'loss', 'content': 0.18453532457351685, 'timestamp': '2025-09-10 02:40:58.866317', 'step': 12848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:58.897553', 'step': 12848, 'epoch': 2} {'type': 'loss', 'content': 0.10497071593999863, 'timestamp': '2025-09-10 02:40:58.900262', 'step': 12849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.931033', 'step': 12849, 'epoch': 2} {'type': 'loss', 'content': 0.1539384126663208, 'timestamp': '2025-09-10 02:40:58.933376', 'step': 12850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:58.964155', 'step': 12850, 'epoch': 2} {'type': 'loss', 'content': 0.08912903815507889, 'timestamp': '2025-09-10 02:40:58.966702', 'step': 12851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:58.996746', 'step': 12851, 'epoch': 2} {'type': 'loss', 'content': 0.1910058706998825, 'timestamp': '2025-09-10 02:40:59.020501', 'step': 12852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:59.050450', 'step': 12852, 'epoch': 2} {'type': 'loss', 'content': 0.04997984319925308, 'timestamp': '2025-09-10 02:40:59.052933', 'step': 12853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:59.083309', 'step': 12853, 'epoch': 2} {'type': 'loss', 'content': 0.050926946103572845, 'timestamp': '2025-09-10 02:40:59.085651', 'step': 12854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.116228', 'step': 12854, 'epoch': 2} {'type': 'loss', 'content': 0.12506824731826782, 'timestamp': '2025-09-10 02:40:59.118658', 'step': 12855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:59.148981', 'step': 12855, 'epoch': 2} {'type': 'loss', 'content': 0.125692680478096, 'timestamp': '2025-09-10 02:40:59.172469', 'step': 12856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.202748', 'step': 12856, 'epoch': 2} {'type': 'loss', 'content': 0.09627123177051544, 'timestamp': '2025-09-10 02:40:59.206094', 'step': 12857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.236988', 'step': 12857, 'epoch': 2} {'type': 'loss', 'content': 0.15512555837631226, 'timestamp': '2025-09-10 02:40:59.239337', 'step': 12858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:59.269752', 'step': 12858, 'epoch': 2} {'type': 'loss', 'content': 0.1484474539756775, 'timestamp': '2025-09-10 02:40:59.272718', 'step': 12859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:59.302953', 'step': 12859, 'epoch': 2} {'type': 'loss', 'content': 0.07983915507793427, 'timestamp': '2025-09-10 02:40:59.326826', 'step': 12860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.359691', 'step': 12860, 'epoch': 2} {'type': 'loss', 'content': 0.07070142775774002, 'timestamp': '2025-09-10 02:40:59.362077', 'step': 12861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:59.392089', 'step': 12861, 'epoch': 2} {'type': 'loss', 'content': 0.09325549006462097, 'timestamp': '2025-09-10 02:40:59.394114', 'step': 12862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:40:59.423961', 'step': 12862, 'epoch': 2} {'type': 'loss', 'content': 0.10942985117435455, 'timestamp': '2025-09-10 02:40:59.426676', 'step': 12863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:59.457965', 'step': 12863, 'epoch': 2} {'type': 'loss', 'content': 0.043816495686769485, 'timestamp': '2025-09-10 02:40:59.481477', 'step': 12864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.511601', 'step': 12864, 'epoch': 2} {'type': 'loss', 'content': 0.054453566670417786, 'timestamp': '2025-09-10 02:40:59.513869', 'step': 12865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.543425', 'step': 12865, 'epoch': 2} {'type': 'loss', 'content': 0.11209700256586075, 'timestamp': '2025-09-10 02:40:59.545860', 'step': 12866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.576618', 'step': 12866, 'epoch': 2} {'type': 'loss', 'content': 0.11148351430892944, 'timestamp': '2025-09-10 02:40:59.579259', 'step': 12867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:59.609179', 'step': 12867, 'epoch': 2} {'type': 'loss', 'content': 0.11066291481256485, 'timestamp': '2025-09-10 02:40:59.632712', 'step': 12868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:59.665125', 'step': 12868, 'epoch': 2} {'type': 'loss', 'content': 0.07086065411567688, 'timestamp': '2025-09-10 02:40:59.667521', 'step': 12869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:59.697649', 'step': 12869, 'epoch': 2} {'type': 'loss', 'content': 0.10633375495672226, 'timestamp': '2025-09-10 02:40:59.699948', 'step': 12870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.730123', 'step': 12870, 'epoch': 2} {'type': 'loss', 'content': 0.1226997822523117, 'timestamp': '2025-09-10 02:40:59.732610', 'step': 12871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:40:59.764036', 'step': 12871, 'epoch': 2} {'type': 'loss', 'content': 0.14388924837112427, 'timestamp': '2025-09-10 02:40:59.787955', 'step': 12872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.817709', 'step': 12872, 'epoch': 2} {'type': 'loss', 'content': 0.06720483303070068, 'timestamp': '2025-09-10 02:40:59.820132', 'step': 12873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:40:59.849799', 'step': 12873, 'epoch': 2} {'type': 'loss', 'content': 0.06332193315029144, 'timestamp': '2025-09-10 02:40:59.852220', 'step': 12874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.881981', 'step': 12874, 'epoch': 2} {'type': 'loss', 'content': 0.10940245538949966, 'timestamp': '2025-09-10 02:40:59.884495', 'step': 12875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:40:59.914142', 'step': 12875, 'epoch': 2} {'type': 'loss', 'content': 0.1721545308828354, 'timestamp': '2025-09-10 02:40:59.938048', 'step': 12876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:40:59.969476', 'step': 12876, 'epoch': 2} {'type': 'loss', 'content': 0.10198285430669785, 'timestamp': '2025-09-10 02:40:59.972073', 'step': 12877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:00.002365', 'step': 12877, 'epoch': 2} {'type': 'loss', 'content': 0.1358676254749298, 'timestamp': '2025-09-10 02:41:00.004586', 'step': 12878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.034176', 'step': 12878, 'epoch': 2} {'type': 'loss', 'content': 0.04956251010298729, 'timestamp': '2025-09-10 02:41:00.036375', 'step': 12879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:00.067608', 'step': 12879, 'epoch': 2} {'type': 'loss', 'content': 0.053697630763053894, 'timestamp': '2025-09-10 02:41:00.091182', 'step': 12880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.123348', 'step': 12880, 'epoch': 2} {'type': 'loss', 'content': 0.07072214782238007, 'timestamp': '2025-09-10 02:41:00.125453', 'step': 12881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.155028', 'step': 12881, 'epoch': 2} {'type': 'loss', 'content': 0.14210055768489838, 'timestamp': '2025-09-10 02:41:00.157510', 'step': 12882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.188651', 'step': 12882, 'epoch': 2} {'type': 'loss', 'content': 0.04724372923374176, 'timestamp': '2025-09-10 02:41:00.191110', 'step': 12883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:00.221194', 'step': 12883, 'epoch': 2} {'type': 'loss', 'content': 0.11616235226392746, 'timestamp': '2025-09-10 02:41:00.244848', 'step': 12884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:00.275061', 'step': 12884, 'epoch': 2} {'type': 'loss', 'content': 0.0883418470621109, 'timestamp': '2025-09-10 02:41:00.277528', 'step': 12885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.310254', 'step': 12885, 'epoch': 2} {'type': 'loss', 'content': 0.07262072712182999, 'timestamp': '2025-09-10 02:41:00.312790', 'step': 12886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:00.344958', 'step': 12886, 'epoch': 2} {'type': 'loss', 'content': 0.09668795019388199, 'timestamp': '2025-09-10 02:41:00.348808', 'step': 12887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:00.378814', 'step': 12887, 'epoch': 2} {'type': 'loss', 'content': 0.15508495271205902, 'timestamp': '2025-09-10 02:41:00.402428', 'step': 12888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:00.432219', 'step': 12888, 'epoch': 2} {'type': 'loss', 'content': 0.0475362204015255, 'timestamp': '2025-09-10 02:41:00.434605', 'step': 12889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.465324', 'step': 12889, 'epoch': 2} {'type': 'loss', 'content': 0.048858147114515305, 'timestamp': '2025-09-10 02:41:00.467736', 'step': 12890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:00.497544', 'step': 12890, 'epoch': 2} {'type': 'loss', 'content': 0.07164429873228073, 'timestamp': '2025-09-10 02:41:00.500266', 'step': 12891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:00.530397', 'step': 12891, 'epoch': 2} {'type': 'loss', 'content': 0.10253647714853287, 'timestamp': '2025-09-10 02:41:00.553847', 'step': 12892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.583627', 'step': 12892, 'epoch': 2} {'type': 'loss', 'content': 0.11261359602212906, 'timestamp': '2025-09-10 02:41:00.586314', 'step': 12893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:00.616455', 'step': 12893, 'epoch': 2} {'type': 'loss', 'content': 0.12548111379146576, 'timestamp': '2025-09-10 02:41:00.619013', 'step': 12894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:00.648819', 'step': 12894, 'epoch': 2} {'type': 'loss', 'content': 0.11861523240804672, 'timestamp': '2025-09-10 02:41:00.652971', 'step': 12895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:00.683491', 'step': 12895, 'epoch': 2} {'type': 'loss', 'content': 0.09343115240335464, 'timestamp': '2025-09-10 02:41:00.707832', 'step': 12896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:00.738032', 'step': 12896, 'epoch': 2} {'type': 'loss', 'content': 0.07375029474496841, 'timestamp': '2025-09-10 02:41:00.740581', 'step': 12897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.771535', 'step': 12897, 'epoch': 2} {'type': 'loss', 'content': 0.09651795029640198, 'timestamp': '2025-09-10 02:41:00.773903', 'step': 12898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:00.804280', 'step': 12898, 'epoch': 2} {'type': 'loss', 'content': 0.11825872957706451, 'timestamp': '2025-09-10 02:41:00.806797', 'step': 12899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:00.838593', 'step': 12899, 'epoch': 2} {'type': 'loss', 'content': 0.08279971778392792, 'timestamp': '2025-09-10 02:41:00.862055', 'step': 12900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.892836', 'step': 12900, 'epoch': 2} {'type': 'loss', 'content': 0.11767946928739548, 'timestamp': '2025-09-10 02:41:00.895261', 'step': 12901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:00.925604', 'step': 12901, 'epoch': 2} {'type': 'loss', 'content': 0.11770080775022507, 'timestamp': '2025-09-10 02:41:00.928086', 'step': 12902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:00.958398', 'step': 12902, 'epoch': 2} {'type': 'loss', 'content': 0.09737110137939453, 'timestamp': '2025-09-10 02:41:00.961337', 'step': 12903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:00.993388', 'step': 12903, 'epoch': 2} {'type': 'loss', 'content': 0.08721823245286942, 'timestamp': '2025-09-10 02:41:01.017094', 'step': 12904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:01.047379', 'step': 12904, 'epoch': 2} {'type': 'loss', 'content': 0.09210573881864548, 'timestamp': '2025-09-10 02:41:01.049629', 'step': 12905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:01.080937', 'step': 12905, 'epoch': 2} {'type': 'loss', 'content': 0.12370407581329346, 'timestamp': '2025-09-10 02:41:01.082826', 'step': 12906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:01.113880', 'step': 12906, 'epoch': 2} {'type': 'loss', 'content': 0.051134541630744934, 'timestamp': '2025-09-10 02:41:01.116205', 'step': 12907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:01.147610', 'step': 12907, 'epoch': 2} {'type': 'loss', 'content': 0.05117166042327881, 'timestamp': '2025-09-10 02:41:01.171430', 'step': 12908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:01.202194', 'step': 12908, 'epoch': 2} {'type': 'loss', 'content': 0.11975008994340897, 'timestamp': '2025-09-10 02:41:01.204506', 'step': 12909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:01.234756', 'step': 12909, 'epoch': 2} {'type': 'loss', 'content': 0.11422178149223328, 'timestamp': '2025-09-10 02:41:01.237356', 'step': 12910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:01.268422', 'step': 12910, 'epoch': 2} {'type': 'loss', 'content': 0.12925423681735992, 'timestamp': '2025-09-10 02:41:01.270828', 'step': 12911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:01.300735', 'step': 12911, 'epoch': 2} {'type': 'loss', 'content': 0.07818486541509628, 'timestamp': '2025-09-10 02:41:01.324388', 'step': 12912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:01.354633', 'step': 12912, 'epoch': 2} {'type': 'loss', 'content': 0.06630762666463852, 'timestamp': '2025-09-10 02:41:01.356971', 'step': 12913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:01.388680', 'step': 12913, 'epoch': 2} {'type': 'loss', 'content': 0.15776681900024414, 'timestamp': '2025-09-10 02:41:01.391343', 'step': 12914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:01.421719', 'step': 12914, 'epoch': 2} {'type': 'loss', 'content': 0.10271633416414261, 'timestamp': '2025-09-10 02:41:01.423812', 'step': 12915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:01.453329', 'step': 12915, 'epoch': 2} {'type': 'loss', 'content': 0.09644229710102081, 'timestamp': '2025-09-10 02:41:01.477239', 'step': 12916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:01.507615', 'step': 12916, 'epoch': 2} {'type': 'loss', 'content': 0.1138758435845375, 'timestamp': '2025-09-10 02:41:01.510154', 'step': 12917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:01.540300', 'step': 12917, 'epoch': 2} {'type': 'loss', 'content': 0.1893673986196518, 'timestamp': '2025-09-10 02:41:01.542736', 'step': 12918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:01.573658', 'step': 12918, 'epoch': 2} {'type': 'loss', 'content': 0.0821126401424408, 'timestamp': '2025-09-10 02:41:01.575844', 'step': 12919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:01.605899', 'step': 12919, 'epoch': 2} {'type': 'loss', 'content': 0.13351669907569885, 'timestamp': '2025-09-10 02:41:01.629643', 'step': 12920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:01.659930', 'step': 12920, 'epoch': 2} {'type': 'loss', 'content': 0.06999921053647995, 'timestamp': '2025-09-10 02:41:01.663642', 'step': 12921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:01.693826', 'step': 12921, 'epoch': 2} {'type': 'loss', 'content': 0.08273543417453766, 'timestamp': '2025-09-10 02:41:01.696303', 'step': 12922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:01.727132', 'step': 12922, 'epoch': 2} {'type': 'loss', 'content': 0.08037076145410538, 'timestamp': '2025-09-10 02:41:01.729572', 'step': 12923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:01.760136', 'step': 12923, 'epoch': 2} {'type': 'loss', 'content': 0.045369647443294525, 'timestamp': '2025-09-10 02:41:01.783585', 'step': 12924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:01.814318', 'step': 12924, 'epoch': 2} {'type': 'loss', 'content': 0.04628174379467964, 'timestamp': '2025-09-10 02:41:01.817599', 'step': 12925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:01.849042', 'step': 12925, 'epoch': 2} {'type': 'loss', 'content': 0.14037494361400604, 'timestamp': '2025-09-10 02:41:01.851339', 'step': 12926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:01.881028', 'step': 12926, 'epoch': 2} {'type': 'loss', 'content': 0.10607768595218658, 'timestamp': '2025-09-10 02:41:01.883128', 'step': 12927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:01.912597', 'step': 12927, 'epoch': 2} {'type': 'loss', 'content': 0.14864952862262726, 'timestamp': '2025-09-10 02:41:01.936224', 'step': 12928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:01.967377', 'step': 12928, 'epoch': 2} {'type': 'loss', 'content': 0.05212857574224472, 'timestamp': '2025-09-10 02:41:01.969731', 'step': 12929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:01.999889', 'step': 12929, 'epoch': 2} {'type': 'loss', 'content': 0.16564829647541046, 'timestamp': '2025-09-10 02:41:02.002220', 'step': 12930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:02.031817', 'step': 12930, 'epoch': 2} {'type': 'loss', 'content': 0.10915274918079376, 'timestamp': '2025-09-10 02:41:02.034175', 'step': 12931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:02.064195', 'step': 12931, 'epoch': 2} {'type': 'loss', 'content': 0.07033511251211166, 'timestamp': '2025-09-10 02:41:02.087807', 'step': 12932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:02.118722', 'step': 12932, 'epoch': 2} {'type': 'loss', 'content': 0.09634176641702652, 'timestamp': '2025-09-10 02:41:02.121076', 'step': 12933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:02.152038', 'step': 12933, 'epoch': 2} {'type': 'loss', 'content': 0.12648695707321167, 'timestamp': '2025-09-10 02:41:02.154721', 'step': 12934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:02.191754', 'step': 12934, 'epoch': 2} {'type': 'loss', 'content': 0.09737030416727066, 'timestamp': '2025-09-10 02:41:02.194671', 'step': 12935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:02.226004', 'step': 12935, 'epoch': 2} {'type': 'loss', 'content': 0.08008338510990143, 'timestamp': '2025-09-10 02:41:02.250089', 'step': 12936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:02.281853', 'step': 12936, 'epoch': 2} {'type': 'loss', 'content': 0.14828097820281982, 'timestamp': '2025-09-10 02:41:02.284929', 'step': 12937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:02.319733', 'step': 12937, 'epoch': 2} {'type': 'loss', 'content': 0.13870957493782043, 'timestamp': '2025-09-10 02:41:02.322754', 'step': 12938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:02.353666', 'step': 12938, 'epoch': 2} {'type': 'loss', 'content': 0.1172030046582222, 'timestamp': '2025-09-10 02:41:02.356050', 'step': 12939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:02.385583', 'step': 12939, 'epoch': 2} {'type': 'loss', 'content': 0.08167875558137894, 'timestamp': '2025-09-10 02:41:02.409295', 'step': 12940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:02.438890', 'step': 12940, 'epoch': 2} {'type': 'loss', 'content': 0.16343866288661957, 'timestamp': '2025-09-10 02:41:02.441173', 'step': 12941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:02.472255', 'step': 12941, 'epoch': 2} {'type': 'loss', 'content': 0.05626458302140236, 'timestamp': '2025-09-10 02:41:02.475441', 'step': 12942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:02.505505', 'step': 12942, 'epoch': 2} {'type': 'loss', 'content': 0.08132068812847137, 'timestamp': '2025-09-10 02:41:02.507787', 'step': 12943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:02.537521', 'step': 12943, 'epoch': 2} {'type': 'loss', 'content': 0.12100064009428024, 'timestamp': '2025-09-10 02:41:02.561109', 'step': 12944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:02.592302', 'step': 12944, 'epoch': 2} {'type': 'loss', 'content': 0.10704606026411057, 'timestamp': '2025-09-10 02:41:02.594874', 'step': 12945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:02.624858', 'step': 12945, 'epoch': 2} {'type': 'loss', 'content': 0.16056542098522186, 'timestamp': '2025-09-10 02:41:02.627049', 'step': 12946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:02.656652', 'step': 12946, 'epoch': 2} {'type': 'loss', 'content': 0.05802462249994278, 'timestamp': '2025-09-10 02:41:02.659044', 'step': 12947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:02.689132', 'step': 12947, 'epoch': 2} {'type': 'loss', 'content': 0.08818891644477844, 'timestamp': '2025-09-10 02:41:02.713497', 'step': 12948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:02.744789', 'step': 12948, 'epoch': 2} {'type': 'loss', 'content': 0.07802120596170425, 'timestamp': '2025-09-10 02:41:02.747821', 'step': 12949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:02.777615', 'step': 12949, 'epoch': 2} {'type': 'loss', 'content': 0.1582886278629303, 'timestamp': '2025-09-10 02:41:02.779868', 'step': 12950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:02.809379', 'step': 12950, 'epoch': 2} {'type': 'loss', 'content': 0.03430801257491112, 'timestamp': '2025-09-10 02:41:02.811663', 'step': 12951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:02.841780', 'step': 12951, 'epoch': 2} {'type': 'loss', 'content': 0.07750268280506134, 'timestamp': '2025-09-10 02:41:02.865506', 'step': 12952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:02.895560', 'step': 12952, 'epoch': 2} {'type': 'loss', 'content': 0.12802250683307648, 'timestamp': '2025-09-10 02:41:02.898078', 'step': 12953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:02.928164', 'step': 12953, 'epoch': 2} {'type': 'loss', 'content': 0.07361749559640884, 'timestamp': '2025-09-10 02:41:02.930613', 'step': 12954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:02.960405', 'step': 12954, 'epoch': 2} {'type': 'loss', 'content': 0.13729600608348846, 'timestamp': '2025-09-10 02:41:02.962837', 'step': 12955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:02.993511', 'step': 12955, 'epoch': 2} {'type': 'loss', 'content': 0.17782792448997498, 'timestamp': '2025-09-10 02:41:03.017237', 'step': 12956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:03.048072', 'step': 12956, 'epoch': 2} {'type': 'loss', 'content': 0.08519653975963593, 'timestamp': '2025-09-10 02:41:03.050653', 'step': 12957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:03.081175', 'step': 12957, 'epoch': 2} {'type': 'loss', 'content': 0.05585756152868271, 'timestamp': '2025-09-10 02:41:03.083492', 'step': 12958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:03.113500', 'step': 12958, 'epoch': 2} {'type': 'loss', 'content': 0.10698774456977844, 'timestamp': '2025-09-10 02:41:03.116931', 'step': 12959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:03.147392', 'step': 12959, 'epoch': 2} {'type': 'loss', 'content': 0.1727893054485321, 'timestamp': '2025-09-10 02:41:03.171082', 'step': 12960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:03.201795', 'step': 12960, 'epoch': 2} {'type': 'loss', 'content': 0.08355318754911423, 'timestamp': '2025-09-10 02:41:03.204468', 'step': 12961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:03.234692', 'step': 12961, 'epoch': 2} {'type': 'loss', 'content': 0.12910835444927216, 'timestamp': '2025-09-10 02:41:03.237179', 'step': 12962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:03.267138', 'step': 12962, 'epoch': 2} {'type': 'loss', 'content': 0.01898619718849659, 'timestamp': '2025-09-10 02:41:03.269565', 'step': 12963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:03.300256', 'step': 12963, 'epoch': 2} {'type': 'loss', 'content': 0.10705658793449402, 'timestamp': '2025-09-10 02:41:03.328155', 'step': 12964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:03.358310', 'step': 12964, 'epoch': 2} {'type': 'loss', 'content': 0.09311218559741974, 'timestamp': '2025-09-10 02:41:03.360604', 'step': 12965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:41:03.391931', 'step': 12965, 'epoch': 2} {'type': 'loss', 'content': 0.1586969941854477, 'timestamp': '2025-09-10 02:41:03.395728', 'step': 12966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:03.426132', 'step': 12966, 'epoch': 2} {'type': 'loss', 'content': 0.13310778141021729, 'timestamp': '2025-09-10 02:41:03.428384', 'step': 12967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:03.458288', 'step': 12967, 'epoch': 2} {'type': 'loss', 'content': 0.1170414388179779, 'timestamp': '2025-09-10 02:41:03.482311', 'step': 12968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:03.514403', 'step': 12968, 'epoch': 2} {'type': 'loss', 'content': 0.09387388080358505, 'timestamp': '2025-09-10 02:41:03.516719', 'step': 12969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:03.546597', 'step': 12969, 'epoch': 2} {'type': 'loss', 'content': 0.09474717080593109, 'timestamp': '2025-09-10 02:41:03.549072', 'step': 12970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:03.579125', 'step': 12970, 'epoch': 2} {'type': 'loss', 'content': 0.09586072713136673, 'timestamp': '2025-09-10 02:41:03.581745', 'step': 12971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:03.613418', 'step': 12971, 'epoch': 2} {'type': 'loss', 'content': 0.06959078460931778, 'timestamp': '2025-09-10 02:41:03.637033', 'step': 12972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:03.667304', 'step': 12972, 'epoch': 2} {'type': 'loss', 'content': 0.09036188572645187, 'timestamp': '2025-09-10 02:41:03.669663', 'step': 12973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:03.700447', 'step': 12973, 'epoch': 2} {'type': 'loss', 'content': 0.09573744237422943, 'timestamp': '2025-09-10 02:41:03.702937', 'step': 12974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:03.733224', 'step': 12974, 'epoch': 2} {'type': 'loss', 'content': 0.09822795540094376, 'timestamp': '2025-09-10 02:41:03.736079', 'step': 12975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:03.766246', 'step': 12975, 'epoch': 2} {'type': 'loss', 'content': 0.05276386812329292, 'timestamp': '2025-09-10 02:41:03.789944', 'step': 12976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:03.819958', 'step': 12976, 'epoch': 2} {'type': 'loss', 'content': 0.09027153998613358, 'timestamp': '2025-09-10 02:41:03.822306', 'step': 12977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:03.856207', 'step': 12977, 'epoch': 2} {'type': 'loss', 'content': 0.1422891616821289, 'timestamp': '2025-09-10 02:41:03.859548', 'step': 12978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:03.894889', 'step': 12978, 'epoch': 2} {'type': 'loss', 'content': 0.1058092936873436, 'timestamp': '2025-09-10 02:41:03.899293', 'step': 12979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:03.933720', 'step': 12979, 'epoch': 2} {'type': 'loss', 'content': 0.07351400703191757, 'timestamp': '2025-09-10 02:41:03.959918', 'step': 12980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:03.996075', 'step': 12980, 'epoch': 2} {'type': 'loss', 'content': 0.1010642871260643, 'timestamp': '2025-09-10 02:41:03.998337', 'step': 12981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:04.028665', 'step': 12981, 'epoch': 2} {'type': 'loss', 'content': 0.06885742396116257, 'timestamp': '2025-09-10 02:41:04.031247', 'step': 12982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:04.060933', 'step': 12982, 'epoch': 2} {'type': 'loss', 'content': 0.05011335387825966, 'timestamp': '2025-09-10 02:41:04.063742', 'step': 12983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:04.094137', 'step': 12983, 'epoch': 2} {'type': 'loss', 'content': 0.08554476499557495, 'timestamp': '2025-09-10 02:41:04.117683', 'step': 12984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:04.148481', 'step': 12984, 'epoch': 2} {'type': 'loss', 'content': 0.045491598546504974, 'timestamp': '2025-09-10 02:41:04.150788', 'step': 12985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:41:04.180995', 'step': 12985, 'epoch': 2} {'type': 'loss', 'content': 0.16248475015163422, 'timestamp': '2025-09-10 02:41:04.187942', 'step': 12986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:41:04.218412', 'step': 12986, 'epoch': 2} {'type': 'loss', 'content': 0.07706095278263092, 'timestamp': '2025-09-10 02:41:04.222523', 'step': 12987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:04.253333', 'step': 12987, 'epoch': 2} {'type': 'loss', 'content': 0.08150525391101837, 'timestamp': '2025-09-10 02:41:04.277309', 'step': 12988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:04.307508', 'step': 12988, 'epoch': 2} {'type': 'loss', 'content': 0.06981928646564484, 'timestamp': '2025-09-10 02:41:04.312627', 'step': 12989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:04.343343', 'step': 12989, 'epoch': 2} {'type': 'loss', 'content': 0.0460682213306427, 'timestamp': '2025-09-10 02:41:04.345708', 'step': 12990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:04.375659', 'step': 12990, 'epoch': 2} {'type': 'loss', 'content': 0.08539822697639465, 'timestamp': '2025-09-10 02:41:04.378190', 'step': 12991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:04.407909', 'step': 12991, 'epoch': 2} {'type': 'loss', 'content': 0.15632537007331848, 'timestamp': '2025-09-10 02:41:04.431776', 'step': 12992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:04.462199', 'step': 12992, 'epoch': 2} {'type': 'loss', 'content': 0.0874311774969101, 'timestamp': '2025-09-10 02:41:04.464615', 'step': 12993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:04.495569', 'step': 12993, 'epoch': 2} {'type': 'loss', 'content': 0.16053783893585205, 'timestamp': '2025-09-10 02:41:04.497810', 'step': 12994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:41:04.528782', 'step': 12994, 'epoch': 2} {'type': 'loss', 'content': 0.11813434213399887, 'timestamp': '2025-09-10 02:41:04.533522', 'step': 12995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:04.564966', 'step': 12995, 'epoch': 2} {'type': 'loss', 'content': 0.13561145961284637, 'timestamp': '2025-09-10 02:41:04.588621', 'step': 12996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:04.619032', 'step': 12996, 'epoch': 2} {'type': 'loss', 'content': 0.06833983957767487, 'timestamp': '2025-09-10 02:41:04.621419', 'step': 12997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:04.652371', 'step': 12997, 'epoch': 2} {'type': 'loss', 'content': 0.09564094245433807, 'timestamp': '2025-09-10 02:41:04.654993', 'step': 12998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:04.684937', 'step': 12998, 'epoch': 2} {'type': 'loss', 'content': 0.18157437443733215, 'timestamp': '2025-09-10 02:41:04.687802', 'step': 12999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:04.717629', 'step': 12999, 'epoch': 2} {'type': 'loss', 'content': 0.11060254275798798, 'timestamp': '2025-09-10 02:41:04.740963', 'step': 13000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13000', 'timestamp': '2025-09-10 02:41:11.115045', 'step': 13000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:11.173223', 'step': 13000, 'epoch': 2} {'type': 'loss', 'content': 0.04686359316110611, 'timestamp': '2025-09-10 02:41:11.176237', 'step': 13001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:11.210721', 'step': 13001, 'epoch': 2} {'type': 'loss', 'content': 0.09866271913051605, 'timestamp': '2025-09-10 02:41:11.213583', 'step': 13002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:11.244170', 'step': 13002, 'epoch': 2} {'type': 'loss', 'content': 0.08686041831970215, 'timestamp': '2025-09-10 02:41:11.246832', 'step': 13003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:11.277131', 'step': 13003, 'epoch': 2} {'type': 'loss', 'content': 0.09695414453744888, 'timestamp': '2025-09-10 02:41:11.301004', 'step': 13004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:11.336137', 'step': 13004, 'epoch': 2} {'type': 'loss', 'content': 0.09976563602685928, 'timestamp': '2025-09-10 02:41:11.338668', 'step': 13005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:11.370470', 'step': 13005, 'epoch': 2} {'type': 'loss', 'content': 0.11147084087133408, 'timestamp': '2025-09-10 02:41:11.372945', 'step': 13006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:11.404069', 'step': 13006, 'epoch': 2} {'type': 'loss', 'content': 0.0622018463909626, 'timestamp': '2025-09-10 02:41:11.408113', 'step': 13007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:11.441778', 'step': 13007, 'epoch': 2} {'type': 'loss', 'content': 0.1385745257139206, 'timestamp': '2025-09-10 02:41:11.465813', 'step': 13008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:11.497126', 'step': 13008, 'epoch': 2} {'type': 'loss', 'content': 0.13635952770709991, 'timestamp': '2025-09-10 02:41:11.499686', 'step': 13009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:11.530735', 'step': 13009, 'epoch': 2} {'type': 'loss', 'content': 0.07364393770694733, 'timestamp': '2025-09-10 02:41:11.533319', 'step': 13010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:11.563785', 'step': 13010, 'epoch': 2} {'type': 'loss', 'content': 0.18527908623218536, 'timestamp': '2025-09-10 02:41:11.566435', 'step': 13011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:11.596856', 'step': 13011, 'epoch': 2} {'type': 'loss', 'content': 0.1791066974401474, 'timestamp': '2025-09-10 02:41:11.620821', 'step': 13012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:11.652158', 'step': 13012, 'epoch': 2} {'type': 'loss', 'content': 0.1822216957807541, 'timestamp': '2025-09-10 02:41:11.654718', 'step': 13013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:11.685672', 'step': 13013, 'epoch': 2} {'type': 'loss', 'content': 0.15834404528141022, 'timestamp': '2025-09-10 02:41:11.688179', 'step': 13014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:11.719914', 'step': 13014, 'epoch': 2} {'type': 'loss', 'content': 0.0702851191163063, 'timestamp': '2025-09-10 02:41:11.725184', 'step': 13015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:11.755269', 'step': 13015, 'epoch': 2} {'type': 'loss', 'content': 0.074713334441185, 'timestamp': '2025-09-10 02:41:11.779254', 'step': 13016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:11.810178', 'step': 13016, 'epoch': 2} {'type': 'loss', 'content': 0.11069758236408234, 'timestamp': '2025-09-10 02:41:11.812811', 'step': 13017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:11.843477', 'step': 13017, 'epoch': 2} {'type': 'loss', 'content': 0.09255725890398026, 'timestamp': '2025-09-10 02:41:11.846105', 'step': 13018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:11.875761', 'step': 13018, 'epoch': 2} {'type': 'loss', 'content': 0.09545228630304337, 'timestamp': '2025-09-10 02:41:11.878175', 'step': 13019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:11.909371', 'step': 13019, 'epoch': 2} {'type': 'loss', 'content': 0.08553840965032578, 'timestamp': '2025-09-10 02:41:11.932868', 'step': 13020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:11.963540', 'step': 13020, 'epoch': 2} {'type': 'loss', 'content': 0.08734653145074844, 'timestamp': '2025-09-10 02:41:11.966793', 'step': 13021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:11.997546', 'step': 13021, 'epoch': 2} {'type': 'loss', 'content': 0.06830631196498871, 'timestamp': '2025-09-10 02:41:11.999894', 'step': 13022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:41:12.030677', 'step': 13022, 'epoch': 2} {'type': 'loss', 'content': 0.1152048334479332, 'timestamp': '2025-09-10 02:41:12.035042', 'step': 13023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:41:12.066007', 'step': 13023, 'epoch': 2} {'type': 'loss', 'content': 0.1043558344244957, 'timestamp': '2025-09-10 02:41:12.091410', 'step': 13024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:41:12.125786', 'step': 13024, 'epoch': 2} {'type': 'loss', 'content': 0.11129520833492279, 'timestamp': '2025-09-10 02:41:12.128380', 'step': 13025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:12.157928', 'step': 13025, 'epoch': 2} {'type': 'loss', 'content': 0.26160117983818054, 'timestamp': '2025-09-10 02:41:12.160558', 'step': 13026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:12.191909', 'step': 13026, 'epoch': 2} {'type': 'loss', 'content': 0.0741051733493805, 'timestamp': '2025-09-10 02:41:12.195341', 'step': 13027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:12.228334', 'step': 13027, 'epoch': 2} {'type': 'loss', 'content': 0.10776232928037643, 'timestamp': '2025-09-10 02:41:12.251978', 'step': 13028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:12.283132', 'step': 13028, 'epoch': 2} {'type': 'loss', 'content': 0.1340150684118271, 'timestamp': '2025-09-10 02:41:12.285777', 'step': 13029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:12.317322', 'step': 13029, 'epoch': 2} {'type': 'loss', 'content': 0.10922021418809891, 'timestamp': '2025-09-10 02:41:12.319768', 'step': 13030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:12.353793', 'step': 13030, 'epoch': 2} {'type': 'loss', 'content': 0.06351775676012039, 'timestamp': '2025-09-10 02:41:12.356309', 'step': 13031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:12.388210', 'step': 13031, 'epoch': 2} {'type': 'loss', 'content': 0.09100072830915451, 'timestamp': '2025-09-10 02:41:12.412384', 'step': 13032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:12.442303', 'step': 13032, 'epoch': 2} {'type': 'loss', 'content': 0.03214695677161217, 'timestamp': '2025-09-10 02:41:12.444895', 'step': 13033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:12.474901', 'step': 13033, 'epoch': 2} {'type': 'loss', 'content': 0.08439799398183823, 'timestamp': '2025-09-10 02:41:12.477901', 'step': 13034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:41:12.508945', 'step': 13034, 'epoch': 2} {'type': 'loss', 'content': 0.07987341284751892, 'timestamp': '2025-09-10 02:41:12.512784', 'step': 13035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:12.542584', 'step': 13035, 'epoch': 2} {'type': 'loss', 'content': 0.02599082514643669, 'timestamp': '2025-09-10 02:41:12.566407', 'step': 13036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:12.597036', 'step': 13036, 'epoch': 2} {'type': 'loss', 'content': 0.1388581544160843, 'timestamp': '2025-09-10 02:41:12.599449', 'step': 13037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:12.630806', 'step': 13037, 'epoch': 2} {'type': 'loss', 'content': 0.09073352068662643, 'timestamp': '2025-09-10 02:41:12.633154', 'step': 13038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:12.663206', 'step': 13038, 'epoch': 2} {'type': 'loss', 'content': 0.09881763160228729, 'timestamp': '2025-09-10 02:41:12.665567', 'step': 13039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:12.695682', 'step': 13039, 'epoch': 2} {'type': 'loss', 'content': 0.1569763571023941, 'timestamp': '2025-09-10 02:41:12.719635', 'step': 13040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:12.753080', 'step': 13040, 'epoch': 2} {'type': 'loss', 'content': 0.07911816984415054, 'timestamp': '2025-09-10 02:41:12.762226', 'step': 13041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:12.792122', 'step': 13041, 'epoch': 2} {'type': 'loss', 'content': 0.08575589954853058, 'timestamp': '2025-09-10 02:41:12.794795', 'step': 13042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:12.825183', 'step': 13042, 'epoch': 2} {'type': 'loss', 'content': 0.10386624187231064, 'timestamp': '2025-09-10 02:41:12.827491', 'step': 13043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:12.857562', 'step': 13043, 'epoch': 2} {'type': 'loss', 'content': 0.12529651820659637, 'timestamp': '2025-09-10 02:41:12.880996', 'step': 13044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:12.912638', 'step': 13044, 'epoch': 2} {'type': 'loss', 'content': 0.1577552706003189, 'timestamp': '2025-09-10 02:41:12.915419', 'step': 13045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:12.945780', 'step': 13045, 'epoch': 2} {'type': 'loss', 'content': 0.020884795114398003, 'timestamp': '2025-09-10 02:41:12.950632', 'step': 13046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:12.981625', 'step': 13046, 'epoch': 2} {'type': 'loss', 'content': 0.05694737285375595, 'timestamp': '2025-09-10 02:41:12.984381', 'step': 13047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:13.015419', 'step': 13047, 'epoch': 2} {'type': 'loss', 'content': 0.06413807719945908, 'timestamp': '2025-09-10 02:41:13.038918', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:41:20.870385', 'step': 13048, 'epoch': 2} {'type': 'pplx', 'content': 9361.681839653678, 'timestamp': '2025-09-10 02:41:20.873557', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:20.903910', 'step': 13048, 'epoch': 2} {'type': 'loss', 'content': 0.09107053279876709, 'timestamp': '2025-09-10 02:41:20.906571', 'step': 13049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:20.940047', 'step': 13049, 'epoch': 2} {'type': 'loss', 'content': 0.07497110962867737, 'timestamp': '2025-09-10 02:41:20.943494', 'step': 13050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:20.974130', 'step': 13050, 'epoch': 2} {'type': 'loss', 'content': 0.13962025940418243, 'timestamp': '2025-09-10 02:41:20.976331', 'step': 13051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:21.007340', 'step': 13051, 'epoch': 2} {'type': 'loss', 'content': 0.06453035771846771, 'timestamp': '2025-09-10 02:41:21.033894', 'step': 13052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:21.064734', 'step': 13052, 'epoch': 2} {'type': 'loss', 'content': 0.09264302998781204, 'timestamp': '2025-09-10 02:41:21.067437', 'step': 13053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.098596', 'step': 13053, 'epoch': 2} {'type': 'loss', 'content': 0.10009889304637909, 'timestamp': '2025-09-10 02:41:21.101556', 'step': 13054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:21.133564', 'step': 13054, 'epoch': 2} {'type': 'loss', 'content': 0.08821652829647064, 'timestamp': '2025-09-10 02:41:21.135765', 'step': 13055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:21.167712', 'step': 13055, 'epoch': 2} {'type': 'loss', 'content': 0.07956444472074509, 'timestamp': '2025-09-10 02:41:21.191612', 'step': 13056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.222195', 'step': 13056, 'epoch': 2} {'type': 'loss', 'content': 0.18571080267429352, 'timestamp': '2025-09-10 02:41:21.226465', 'step': 13057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.259301', 'step': 13057, 'epoch': 2} {'type': 'loss', 'content': 0.07402970641851425, 'timestamp': '2025-09-10 02:41:21.261583', 'step': 13058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:21.293630', 'step': 13058, 'epoch': 2} {'type': 'loss', 'content': 0.1194000393152237, 'timestamp': '2025-09-10 02:41:21.296651', 'step': 13059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:21.327339', 'step': 13059, 'epoch': 2} {'type': 'loss', 'content': 0.10155124217271805, 'timestamp': '2025-09-10 02:41:21.351658', 'step': 13060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.395162', 'step': 13060, 'epoch': 2} {'type': 'loss', 'content': 0.11384142935276031, 'timestamp': '2025-09-10 02:41:21.397545', 'step': 13061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:21.428591', 'step': 13061, 'epoch': 2} {'type': 'loss', 'content': 0.10708197951316833, 'timestamp': '2025-09-10 02:41:21.431253', 'step': 13062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.470776', 'step': 13062, 'epoch': 2} {'type': 'loss', 'content': 0.12155357748270035, 'timestamp': '2025-09-10 02:41:21.474404', 'step': 13063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.506537', 'step': 13063, 'epoch': 2} {'type': 'loss', 'content': 0.07593149691820145, 'timestamp': '2025-09-10 02:41:21.531130', 'step': 13064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.563256', 'step': 13064, 'epoch': 2} {'type': 'loss', 'content': 0.10408512502908707, 'timestamp': '2025-09-10 02:41:21.565798', 'step': 13065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:21.596891', 'step': 13065, 'epoch': 2} {'type': 'loss', 'content': 0.04134185239672661, 'timestamp': '2025-09-10 02:41:21.599423', 'step': 13066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:21.631625', 'step': 13066, 'epoch': 2} {'type': 'loss', 'content': 0.10716615617275238, 'timestamp': '2025-09-10 02:41:21.634546', 'step': 13067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:21.666583', 'step': 13067, 'epoch': 2} {'type': 'loss', 'content': 0.1305358111858368, 'timestamp': '2025-09-10 02:41:21.691552', 'step': 13068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.722240', 'step': 13068, 'epoch': 2} {'type': 'loss', 'content': 0.07058456540107727, 'timestamp': '2025-09-10 02:41:21.724874', 'step': 13069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:21.757413', 'step': 13069, 'epoch': 2} {'type': 'loss', 'content': 0.05156487226486206, 'timestamp': '2025-09-10 02:41:21.762834', 'step': 13070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.794527', 'step': 13070, 'epoch': 2} {'type': 'loss', 'content': 0.06451074779033661, 'timestamp': '2025-09-10 02:41:21.797110', 'step': 13071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:21.827694', 'step': 13071, 'epoch': 2} {'type': 'loss', 'content': 0.08187543600797653, 'timestamp': '2025-09-10 02:41:21.851701', 'step': 13072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:21.881886', 'step': 13072, 'epoch': 2} {'type': 'loss', 'content': 0.12811803817749023, 'timestamp': '2025-09-10 02:41:21.887082', 'step': 13073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.918630', 'step': 13073, 'epoch': 2} {'type': 'loss', 'content': 0.0913487896323204, 'timestamp': '2025-09-10 02:41:21.921215', 'step': 13074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:21.953581', 'step': 13074, 'epoch': 2} {'type': 'loss', 'content': 0.12079834938049316, 'timestamp': '2025-09-10 02:41:21.956167', 'step': 13075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:21.987653', 'step': 13075, 'epoch': 2} {'type': 'loss', 'content': 0.06220598518848419, 'timestamp': '2025-09-10 02:41:22.012805', 'step': 13076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.044887', 'step': 13076, 'epoch': 2} {'type': 'loss', 'content': 0.10057125985622406, 'timestamp': '2025-09-10 02:41:22.047539', 'step': 13077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.078441', 'step': 13077, 'epoch': 2} {'type': 'loss', 'content': 0.104644775390625, 'timestamp': '2025-09-10 02:41:22.081275', 'step': 13078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:22.112978', 'step': 13078, 'epoch': 2} {'type': 'loss', 'content': 0.06552387028932571, 'timestamp': '2025-09-10 02:41:22.114969', 'step': 13079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.146645', 'step': 13079, 'epoch': 2} {'type': 'loss', 'content': 0.10880136489868164, 'timestamp': '2025-09-10 02:41:22.171086', 'step': 13080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:22.202363', 'step': 13080, 'epoch': 2} {'type': 'loss', 'content': 0.16821342706680298, 'timestamp': '2025-09-10 02:41:22.205111', 'step': 13081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:22.237004', 'step': 13081, 'epoch': 2} {'type': 'loss', 'content': 0.09915371239185333, 'timestamp': '2025-09-10 02:41:22.240192', 'step': 13082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:22.270454', 'step': 13082, 'epoch': 2} {'type': 'loss', 'content': 0.08444911986589432, 'timestamp': '2025-09-10 02:41:22.273187', 'step': 13083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:22.303659', 'step': 13083, 'epoch': 2} {'type': 'loss', 'content': 0.12092411518096924, 'timestamp': '2025-09-10 02:41:22.327517', 'step': 13084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.359146', 'step': 13084, 'epoch': 2} {'type': 'loss', 'content': 0.14532488584518433, 'timestamp': '2025-09-10 02:41:22.364519', 'step': 13085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.395444', 'step': 13085, 'epoch': 2} {'type': 'loss', 'content': 0.16696655750274658, 'timestamp': '2025-09-10 02:41:22.397723', 'step': 13086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.428772', 'step': 13086, 'epoch': 2} {'type': 'loss', 'content': 0.20963458716869354, 'timestamp': '2025-09-10 02:41:22.432587', 'step': 13087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.464158', 'step': 13087, 'epoch': 2} {'type': 'loss', 'content': 0.11262715607881546, 'timestamp': '2025-09-10 02:41:22.488039', 'step': 13088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.519379', 'step': 13088, 'epoch': 2} {'type': 'loss', 'content': 0.10857436805963516, 'timestamp': '2025-09-10 02:41:22.521913', 'step': 13089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.554306', 'step': 13089, 'epoch': 2} {'type': 'loss', 'content': 0.1317751109600067, 'timestamp': '2025-09-10 02:41:22.556857', 'step': 13090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:22.587190', 'step': 13090, 'epoch': 2} {'type': 'loss', 'content': 0.13853929936885834, 'timestamp': '2025-09-10 02:41:22.589647', 'step': 13091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:22.619885', 'step': 13091, 'epoch': 2} {'type': 'loss', 'content': 0.13742440938949585, 'timestamp': '2025-09-10 02:41:22.643496', 'step': 13092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:22.673663', 'step': 13092, 'epoch': 2} {'type': 'loss', 'content': 0.14803065359592438, 'timestamp': '2025-09-10 02:41:22.675961', 'step': 13093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.706263', 'step': 13093, 'epoch': 2} {'type': 'loss', 'content': 0.08016930520534515, 'timestamp': '2025-09-10 02:41:22.709238', 'step': 13094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:22.740720', 'step': 13094, 'epoch': 2} {'type': 'loss', 'content': 0.1621192991733551, 'timestamp': '2025-09-10 02:41:22.742765', 'step': 13095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:22.773391', 'step': 13095, 'epoch': 2} {'type': 'loss', 'content': 0.1255151480436325, 'timestamp': '2025-09-10 02:41:22.797269', 'step': 13096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:22.827664', 'step': 13096, 'epoch': 2} {'type': 'loss', 'content': 0.07873599976301193, 'timestamp': '2025-09-10 02:41:22.829980', 'step': 13097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:41:22.860566', 'step': 13097, 'epoch': 2} {'type': 'loss', 'content': 0.09825512766838074, 'timestamp': '2025-09-10 02:41:22.864842', 'step': 13098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:22.895994', 'step': 13098, 'epoch': 2} {'type': 'loss', 'content': 0.12705351412296295, 'timestamp': '2025-09-10 02:41:22.898226', 'step': 13099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:22.928431', 'step': 13099, 'epoch': 2} {'type': 'loss', 'content': 0.08820539712905884, 'timestamp': '2025-09-10 02:41:22.952019', 'step': 13100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:22.984184', 'step': 13100, 'epoch': 2} {'type': 'loss', 'content': 0.09426110982894897, 'timestamp': '2025-09-10 02:41:22.986269', 'step': 13101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:23.016521', 'step': 13101, 'epoch': 2} {'type': 'loss', 'content': 0.20696088671684265, 'timestamp': '2025-09-10 02:41:23.019189', 'step': 13102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:23.049435', 'step': 13102, 'epoch': 2} {'type': 'loss', 'content': 0.11295538395643234, 'timestamp': '2025-09-10 02:41:23.051632', 'step': 13103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:23.082318', 'step': 13103, 'epoch': 2} {'type': 'loss', 'content': 0.1619415283203125, 'timestamp': '2025-09-10 02:41:23.106034', 'step': 13104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:23.137189', 'step': 13104, 'epoch': 2} {'type': 'loss', 'content': 0.10452980548143387, 'timestamp': '2025-09-10 02:41:23.140402', 'step': 13105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:23.172137', 'step': 13105, 'epoch': 2} {'type': 'loss', 'content': 0.11363226175308228, 'timestamp': '2025-09-10 02:41:23.174706', 'step': 13106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:23.205317', 'step': 13106, 'epoch': 2} {'type': 'loss', 'content': 0.04976944252848625, 'timestamp': '2025-09-10 02:41:23.207674', 'step': 13107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:41:23.238676', 'step': 13107, 'epoch': 2} {'type': 'loss', 'content': 0.14549213647842407, 'timestamp': '2025-09-10 02:41:23.263592', 'step': 13108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:23.294555', 'step': 13108, 'epoch': 2} {'type': 'loss', 'content': 0.10963208228349686, 'timestamp': '2025-09-10 02:41:23.297009', 'step': 13109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:23.327812', 'step': 13109, 'epoch': 2} {'type': 'loss', 'content': 0.0913698822259903, 'timestamp': '2025-09-10 02:41:23.330141', 'step': 13110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:23.360485', 'step': 13110, 'epoch': 2} {'type': 'loss', 'content': 0.084140345454216, 'timestamp': '2025-09-10 02:41:23.363955', 'step': 13111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:23.402650', 'step': 13111, 'epoch': 2} {'type': 'loss', 'content': 0.019553834572434425, 'timestamp': '2025-09-10 02:41:23.426732', 'step': 13112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:23.457341', 'step': 13112, 'epoch': 2} {'type': 'loss', 'content': 0.05581362918019295, 'timestamp': '2025-09-10 02:41:23.459853', 'step': 13113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:23.490904', 'step': 13113, 'epoch': 2} {'type': 'loss', 'content': 0.13396692276000977, 'timestamp': '2025-09-10 02:41:23.493107', 'step': 13114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:23.522954', 'step': 13114, 'epoch': 2} {'type': 'loss', 'content': 0.10619109869003296, 'timestamp': '2025-09-10 02:41:23.525096', 'step': 13115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:23.555395', 'step': 13115, 'epoch': 2} {'type': 'loss', 'content': 0.1645677387714386, 'timestamp': '2025-09-10 02:41:23.579148', 'step': 13116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:23.609033', 'step': 13116, 'epoch': 2} {'type': 'loss', 'content': 0.08533235639333725, 'timestamp': '2025-09-10 02:41:23.611466', 'step': 13117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:23.642375', 'step': 13117, 'epoch': 2} {'type': 'loss', 'content': 0.17863458395004272, 'timestamp': '2025-09-10 02:41:23.645981', 'step': 13118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:23.675763', 'step': 13118, 'epoch': 2} {'type': 'loss', 'content': 0.06390781700611115, 'timestamp': '2025-09-10 02:41:23.678193', 'step': 13119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:23.707640', 'step': 13119, 'epoch': 2} {'type': 'loss', 'content': 0.08501237630844116, 'timestamp': '2025-09-10 02:41:23.731342', 'step': 13120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:23.761559', 'step': 13120, 'epoch': 2} {'type': 'loss', 'content': 0.09658833593130112, 'timestamp': '2025-09-10 02:41:23.764591', 'step': 13121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:23.797212', 'step': 13121, 'epoch': 2} {'type': 'loss', 'content': 0.1439291089773178, 'timestamp': '2025-09-10 02:41:23.799383', 'step': 13122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:23.828725', 'step': 13122, 'epoch': 2} {'type': 'loss', 'content': 0.06412125378847122, 'timestamp': '2025-09-10 02:41:23.831423', 'step': 13123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:23.862497', 'step': 13123, 'epoch': 2} {'type': 'loss', 'content': 0.07459168881177902, 'timestamp': '2025-09-10 02:41:23.886014', 'step': 13124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:23.915708', 'step': 13124, 'epoch': 2} {'type': 'loss', 'content': 0.055430445820093155, 'timestamp': '2025-09-10 02:41:23.917908', 'step': 13125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:23.946794', 'step': 13125, 'epoch': 2} {'type': 'loss', 'content': 0.024713443592190742, 'timestamp': '2025-09-10 02:41:23.950460', 'step': 13126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:23.981862', 'step': 13126, 'epoch': 2} {'type': 'loss', 'content': 0.060479145497083664, 'timestamp': '2025-09-10 02:41:23.984302', 'step': 13127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:24.013959', 'step': 13127, 'epoch': 2} {'type': 'loss', 'content': 0.08137737959623337, 'timestamp': '2025-09-10 02:41:24.037612', 'step': 13128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:41:24.067115', 'step': 13128, 'epoch': 2} {'type': 'loss', 'content': 0.05481899529695511, 'timestamp': '2025-09-10 02:41:24.069412', 'step': 13129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:24.100024', 'step': 13129, 'epoch': 2} {'type': 'loss', 'content': 0.058677006512880325, 'timestamp': '2025-09-10 02:41:24.102253', 'step': 13130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:24.131963', 'step': 13130, 'epoch': 2} {'type': 'loss', 'content': 0.10666199773550034, 'timestamp': '2025-09-10 02:41:24.134711', 'step': 13131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:24.164583', 'step': 13131, 'epoch': 2} {'type': 'loss', 'content': 0.13307161629199982, 'timestamp': '2025-09-10 02:41:24.188338', 'step': 13132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:24.219036', 'step': 13132, 'epoch': 2} {'type': 'loss', 'content': 0.16649563610553741, 'timestamp': '2025-09-10 02:41:24.220909', 'step': 13133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:24.249987', 'step': 13133, 'epoch': 2} {'type': 'loss', 'content': 0.07583244889974594, 'timestamp': '2025-09-10 02:41:24.254511', 'step': 13134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:24.284450', 'step': 13134, 'epoch': 2} {'type': 'loss', 'content': 0.08561699837446213, 'timestamp': '2025-09-10 02:41:24.289715', 'step': 13135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:24.322582', 'step': 13135, 'epoch': 2} {'type': 'loss', 'content': 0.12030690163373947, 'timestamp': '2025-09-10 02:41:24.346214', 'step': 13136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:24.376507', 'step': 13136, 'epoch': 2} {'type': 'loss', 'content': 0.08310731500387192, 'timestamp': '2025-09-10 02:41:24.379375', 'step': 13137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:24.415542', 'step': 13137, 'epoch': 2} {'type': 'loss', 'content': 0.057598818093538284, 'timestamp': '2025-09-10 02:41:24.420070', 'step': 13138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:24.452608', 'step': 13138, 'epoch': 2} {'type': 'loss', 'content': 0.05682498216629028, 'timestamp': '2025-09-10 02:41:24.456451', 'step': 13139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:24.486653', 'step': 13139, 'epoch': 2} {'type': 'loss', 'content': 0.08264879882335663, 'timestamp': '2025-09-10 02:41:24.510822', 'step': 13140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:24.541928', 'step': 13140, 'epoch': 2} {'type': 'loss', 'content': 0.10430970788002014, 'timestamp': '2025-09-10 02:41:24.545093', 'step': 13141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:24.578348', 'step': 13141, 'epoch': 2} {'type': 'loss', 'content': 0.1148289144039154, 'timestamp': '2025-09-10 02:41:24.580380', 'step': 13142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:24.609810', 'step': 13142, 'epoch': 2} {'type': 'loss', 'content': 0.10624488443136215, 'timestamp': '2025-09-10 02:41:24.612137', 'step': 13143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:24.641416', 'step': 13143, 'epoch': 2} {'type': 'loss', 'content': 0.19417479634284973, 'timestamp': '2025-09-10 02:41:24.664736', 'step': 13144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:24.693446', 'step': 13144, 'epoch': 2} {'type': 'loss', 'content': 0.0740976333618164, 'timestamp': '2025-09-10 02:41:24.695278', 'step': 13145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:24.724283', 'step': 13145, 'epoch': 2} {'type': 'loss', 'content': 0.11342484503984451, 'timestamp': '2025-09-10 02:41:24.726243', 'step': 13146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:24.755227', 'step': 13146, 'epoch': 2} {'type': 'loss', 'content': 0.106416255235672, 'timestamp': '2025-09-10 02:41:24.757269', 'step': 13147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:24.787455', 'step': 13147, 'epoch': 2} {'type': 'loss', 'content': 0.10842451453208923, 'timestamp': '2025-09-10 02:41:24.810551', 'step': 13148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:24.839730', 'step': 13148, 'epoch': 2} {'type': 'loss', 'content': 0.10195104032754898, 'timestamp': '2025-09-10 02:41:24.841958', 'step': 13149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:24.871940', 'step': 13149, 'epoch': 2} {'type': 'loss', 'content': 0.13531231880187988, 'timestamp': '2025-09-10 02:41:24.874251', 'step': 13150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:24.906843', 'step': 13150, 'epoch': 2} {'type': 'loss', 'content': 0.10766465216875076, 'timestamp': '2025-09-10 02:41:24.909293', 'step': 13151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:24.939593', 'step': 13151, 'epoch': 2} {'type': 'loss', 'content': 0.0880076065659523, 'timestamp': '2025-09-10 02:41:24.963536', 'step': 13152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:24.997143', 'step': 13152, 'epoch': 2} {'type': 'loss', 'content': 0.06573010236024857, 'timestamp': '2025-09-10 02:41:25.000070', 'step': 13153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:25.031508', 'step': 13153, 'epoch': 2} {'type': 'loss', 'content': 0.15470805764198303, 'timestamp': '2025-09-10 02:41:25.035080', 'step': 13154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:25.066969', 'step': 13154, 'epoch': 2} {'type': 'loss', 'content': 0.14579370617866516, 'timestamp': '2025-09-10 02:41:25.069255', 'step': 13155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:25.098563', 'step': 13155, 'epoch': 2} {'type': 'loss', 'content': 0.1592497080564499, 'timestamp': '2025-09-10 02:41:25.121880', 'step': 13156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:25.152405', 'step': 13156, 'epoch': 2} {'type': 'loss', 'content': 0.07474590092897415, 'timestamp': '2025-09-10 02:41:25.154098', 'step': 13157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:25.185205', 'step': 13157, 'epoch': 2} {'type': 'loss', 'content': 0.1265280544757843, 'timestamp': '2025-09-10 02:41:25.187293', 'step': 13158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:25.216608', 'step': 13158, 'epoch': 2} {'type': 'loss', 'content': 0.07803403586149216, 'timestamp': '2025-09-10 02:41:25.218275', 'step': 13159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:25.247316', 'step': 13159, 'epoch': 2} {'type': 'loss', 'content': 0.03764998912811279, 'timestamp': '2025-09-10 02:41:25.270322', 'step': 13160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:25.300248', 'step': 13160, 'epoch': 2} {'type': 'loss', 'content': 0.11907581239938736, 'timestamp': '2025-09-10 02:41:25.302167', 'step': 13161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:25.331094', 'step': 13161, 'epoch': 2} {'type': 'loss', 'content': 0.050989434123039246, 'timestamp': '2025-09-10 02:41:25.333419', 'step': 13162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:25.363315', 'step': 13162, 'epoch': 2} {'type': 'loss', 'content': 0.10426162928342819, 'timestamp': '2025-09-10 02:41:25.366079', 'step': 13163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:25.402938', 'step': 13163, 'epoch': 2} {'type': 'loss', 'content': 0.07277313619852066, 'timestamp': '2025-09-10 02:41:25.426348', 'step': 13164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:25.456022', 'step': 13164, 'epoch': 2} {'type': 'loss', 'content': 0.09335802495479584, 'timestamp': '2025-09-10 02:41:25.458436', 'step': 13165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:25.487774', 'step': 13165, 'epoch': 2} {'type': 'loss', 'content': 0.09162881970405579, 'timestamp': '2025-09-10 02:41:25.490279', 'step': 13166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:25.520250', 'step': 13166, 'epoch': 2} {'type': 'loss', 'content': 0.08695130050182343, 'timestamp': '2025-09-10 02:41:25.523031', 'step': 13167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:25.553415', 'step': 13167, 'epoch': 2} {'type': 'loss', 'content': 0.09589492529630661, 'timestamp': '2025-09-10 02:41:25.577420', 'step': 13168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:25.608027', 'step': 13168, 'epoch': 2} {'type': 'loss', 'content': 0.10538268089294434, 'timestamp': '2025-09-10 02:41:25.610136', 'step': 13169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:25.639674', 'step': 13169, 'epoch': 2} {'type': 'loss', 'content': 0.09178928285837173, 'timestamp': '2025-09-10 02:41:25.641886', 'step': 13170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:25.671889', 'step': 13170, 'epoch': 2} {'type': 'loss', 'content': 0.10875162482261658, 'timestamp': '2025-09-10 02:41:25.673955', 'step': 13171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:25.703733', 'step': 13171, 'epoch': 2} {'type': 'loss', 'content': 0.04495268315076828, 'timestamp': '2025-09-10 02:41:25.727396', 'step': 13172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:25.756865', 'step': 13172, 'epoch': 2} {'type': 'loss', 'content': 0.04788292199373245, 'timestamp': '2025-09-10 02:41:25.758965', 'step': 13173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:25.788331', 'step': 13173, 'epoch': 2} {'type': 'loss', 'content': 0.11471693217754364, 'timestamp': '2025-09-10 02:41:25.791163', 'step': 13174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:25.821296', 'step': 13174, 'epoch': 2} {'type': 'loss', 'content': 0.07833505421876907, 'timestamp': '2025-09-10 02:41:25.823499', 'step': 13175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:25.853249', 'step': 13175, 'epoch': 2} {'type': 'loss', 'content': 0.057203035801649094, 'timestamp': '2025-09-10 02:41:25.876778', 'step': 13176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:25.906737', 'step': 13176, 'epoch': 2} {'type': 'loss', 'content': 0.1091005951166153, 'timestamp': '2025-09-10 02:41:25.909147', 'step': 13177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:25.939320', 'step': 13177, 'epoch': 2} {'type': 'loss', 'content': 0.07680235058069229, 'timestamp': '2025-09-10 02:41:25.941974', 'step': 13178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:25.971603', 'step': 13178, 'epoch': 2} {'type': 'loss', 'content': 0.0846063494682312, 'timestamp': '2025-09-10 02:41:25.976319', 'step': 13179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:26.006775', 'step': 13179, 'epoch': 2} {'type': 'loss', 'content': 0.14071868360042572, 'timestamp': '2025-09-10 02:41:26.030359', 'step': 13180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:26.060274', 'step': 13180, 'epoch': 2} {'type': 'loss', 'content': 0.07009828835725784, 'timestamp': '2025-09-10 02:41:26.062545', 'step': 13181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:26.092284', 'step': 13181, 'epoch': 2} {'type': 'loss', 'content': 0.1058851107954979, 'timestamp': '2025-09-10 02:41:26.094437', 'step': 13182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:26.123569', 'step': 13182, 'epoch': 2} {'type': 'loss', 'content': 0.11144368350505829, 'timestamp': '2025-09-10 02:41:26.125707', 'step': 13183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:26.154599', 'step': 13183, 'epoch': 2} {'type': 'loss', 'content': 0.07904215157032013, 'timestamp': '2025-09-10 02:41:26.178167', 'step': 13184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:26.209464', 'step': 13184, 'epoch': 2} {'type': 'loss', 'content': 0.07126302272081375, 'timestamp': '2025-09-10 02:41:26.211496', 'step': 13185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:26.241231', 'step': 13185, 'epoch': 2} {'type': 'loss', 'content': 0.15509288012981415, 'timestamp': '2025-09-10 02:41:26.243492', 'step': 13186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:26.273758', 'step': 13186, 'epoch': 2} {'type': 'loss', 'content': 0.09687243402004242, 'timestamp': '2025-09-10 02:41:26.276505', 'step': 13187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:26.309152', 'step': 13187, 'epoch': 2} {'type': 'loss', 'content': 0.06556226313114166, 'timestamp': '2025-09-10 02:41:26.332603', 'step': 13188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:26.363135', 'step': 13188, 'epoch': 2} {'type': 'loss', 'content': 0.07783135771751404, 'timestamp': '2025-09-10 02:41:26.365715', 'step': 13189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:26.396904', 'step': 13189, 'epoch': 2} {'type': 'loss', 'content': 0.14854231476783752, 'timestamp': '2025-09-10 02:41:26.399749', 'step': 13190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:26.429456', 'step': 13190, 'epoch': 2} {'type': 'loss', 'content': 0.08023247867822647, 'timestamp': '2025-09-10 02:41:26.432222', 'step': 13191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:26.461542', 'step': 13191, 'epoch': 2} {'type': 'loss', 'content': 0.09036243706941605, 'timestamp': '2025-09-10 02:41:26.485096', 'step': 13192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:26.514009', 'step': 13192, 'epoch': 2} {'type': 'loss', 'content': 0.06746111810207367, 'timestamp': '2025-09-10 02:41:26.516275', 'step': 13193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:26.546525', 'step': 13193, 'epoch': 2} {'type': 'loss', 'content': 0.08609485626220703, 'timestamp': '2025-09-10 02:41:26.560924', 'step': 13194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:26.592257', 'step': 13194, 'epoch': 2} {'type': 'loss', 'content': 0.06604388356208801, 'timestamp': '2025-09-10 02:41:26.594215', 'step': 13195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:26.623754', 'step': 13195, 'epoch': 2} {'type': 'loss', 'content': 0.10523002594709396, 'timestamp': '2025-09-10 02:41:26.647445', 'step': 13196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:26.676692', 'step': 13196, 'epoch': 2} {'type': 'loss', 'content': 0.11927451193332672, 'timestamp': '2025-09-10 02:41:26.678875', 'step': 13197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:26.709771', 'step': 13197, 'epoch': 2} {'type': 'loss', 'content': 0.1034158393740654, 'timestamp': '2025-09-10 02:41:26.712062', 'step': 13198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:26.742312', 'step': 13198, 'epoch': 2} {'type': 'loss', 'content': 0.10855719447135925, 'timestamp': '2025-09-10 02:41:26.744700', 'step': 13199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:26.774157', 'step': 13199, 'epoch': 2} {'type': 'loss', 'content': 0.09080976247787476, 'timestamp': '2025-09-10 02:41:26.800093', 'step': 13200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:26.830604', 'step': 13200, 'epoch': 2} {'type': 'loss', 'content': 0.07150247693061829, 'timestamp': '2025-09-10 02:41:26.832977', 'step': 13201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:26.862706', 'step': 13201, 'epoch': 2} {'type': 'loss', 'content': 0.0834798663854599, 'timestamp': '2025-09-10 02:41:26.865640', 'step': 13202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:26.897049', 'step': 13202, 'epoch': 2} {'type': 'loss', 'content': 0.11117936670780182, 'timestamp': '2025-09-10 02:41:26.900119', 'step': 13203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:26.931595', 'step': 13203, 'epoch': 2} {'type': 'loss', 'content': 0.10107410699129105, 'timestamp': '2025-09-10 02:41:26.955185', 'step': 13204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:26.985795', 'step': 13204, 'epoch': 2} {'type': 'loss', 'content': 0.10515231639146805, 'timestamp': '2025-09-10 02:41:26.990296', 'step': 13205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.020158', 'step': 13205, 'epoch': 2} {'type': 'loss', 'content': 0.14414796233177185, 'timestamp': '2025-09-10 02:41:27.022235', 'step': 13206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.053079', 'step': 13206, 'epoch': 2} {'type': 'loss', 'content': 0.06944579631090164, 'timestamp': '2025-09-10 02:41:27.055547', 'step': 13207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:27.086226', 'step': 13207, 'epoch': 2} {'type': 'loss', 'content': 0.08497811108827591, 'timestamp': '2025-09-10 02:41:27.110247', 'step': 13208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.139971', 'step': 13208, 'epoch': 2} {'type': 'loss', 'content': 0.12952838838100433, 'timestamp': '2025-09-10 02:41:27.141948', 'step': 13209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.171137', 'step': 13209, 'epoch': 2} {'type': 'loss', 'content': 0.13530392944812775, 'timestamp': '2025-09-10 02:41:27.172942', 'step': 13210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.202882', 'step': 13210, 'epoch': 2} {'type': 'loss', 'content': 0.11197468638420105, 'timestamp': '2025-09-10 02:41:27.204828', 'step': 13211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.234106', 'step': 13211, 'epoch': 2} {'type': 'loss', 'content': 0.08697310090065002, 'timestamp': '2025-09-10 02:41:27.257737', 'step': 13212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:27.288696', 'step': 13212, 'epoch': 2} {'type': 'loss', 'content': 0.058592136949300766, 'timestamp': '2025-09-10 02:41:27.290587', 'step': 13213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:27.321245', 'step': 13213, 'epoch': 2} {'type': 'loss', 'content': 0.08806999027729034, 'timestamp': '2025-09-10 02:41:27.323985', 'step': 13214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:27.354197', 'step': 13214, 'epoch': 2} {'type': 'loss', 'content': 0.06620193272829056, 'timestamp': '2025-09-10 02:41:27.356453', 'step': 13215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.387134', 'step': 13215, 'epoch': 2} {'type': 'loss', 'content': 0.07179688662290573, 'timestamp': '2025-09-10 02:41:27.410586', 'step': 13216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:27.440898', 'step': 13216, 'epoch': 2} {'type': 'loss', 'content': 0.11055555194616318, 'timestamp': '2025-09-10 02:41:27.442906', 'step': 13217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:27.473285', 'step': 13217, 'epoch': 2} {'type': 'loss', 'content': 0.17515939474105835, 'timestamp': '2025-09-10 02:41:27.475964', 'step': 13218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:27.506242', 'step': 13218, 'epoch': 2} {'type': 'loss', 'content': 0.0830811932682991, 'timestamp': '2025-09-10 02:41:27.508205', 'step': 13219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.536836', 'step': 13219, 'epoch': 2} {'type': 'loss', 'content': 0.08028280735015869, 'timestamp': '2025-09-10 02:41:27.560213', 'step': 13220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.589978', 'step': 13220, 'epoch': 2} {'type': 'loss', 'content': 0.09991162270307541, 'timestamp': '2025-09-10 02:41:27.592091', 'step': 13221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:27.621620', 'step': 13221, 'epoch': 2} {'type': 'loss', 'content': 0.1490732878446579, 'timestamp': '2025-09-10 02:41:27.623862', 'step': 13222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.652721', 'step': 13222, 'epoch': 2} {'type': 'loss', 'content': 0.029531287029385567, 'timestamp': '2025-09-10 02:41:27.655001', 'step': 13223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.684445', 'step': 13223, 'epoch': 2} {'type': 'loss', 'content': 0.07418045401573181, 'timestamp': '2025-09-10 02:41:27.707492', 'step': 13224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:27.736936', 'step': 13224, 'epoch': 2} {'type': 'loss', 'content': 0.2034127116203308, 'timestamp': '2025-09-10 02:41:27.738761', 'step': 13225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:27.768519', 'step': 13225, 'epoch': 2} {'type': 'loss', 'content': 0.12855949997901917, 'timestamp': '2025-09-10 02:41:27.771097', 'step': 13226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:27.801339', 'step': 13226, 'epoch': 2} {'type': 'loss', 'content': 0.12570449709892273, 'timestamp': '2025-09-10 02:41:27.805173', 'step': 13227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:27.835452', 'step': 13227, 'epoch': 2} {'type': 'loss', 'content': 0.11816529929637909, 'timestamp': '2025-09-10 02:41:27.859003', 'step': 13228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:27.889087', 'step': 13228, 'epoch': 2} {'type': 'loss', 'content': 0.13274742662906647, 'timestamp': '2025-09-10 02:41:27.891314', 'step': 13229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:27.923298', 'step': 13229, 'epoch': 2} {'type': 'loss', 'content': 0.04174859821796417, 'timestamp': '2025-09-10 02:41:27.929159', 'step': 13230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:27.968439', 'step': 13230, 'epoch': 2} {'type': 'loss', 'content': 0.1639748364686966, 'timestamp': '2025-09-10 02:41:27.972103', 'step': 13231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:28.003043', 'step': 13231, 'epoch': 2} {'type': 'loss', 'content': 0.2230633944272995, 'timestamp': '2025-09-10 02:41:28.026783', 'step': 13232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.056321', 'step': 13232, 'epoch': 2} {'type': 'loss', 'content': 0.08971273899078369, 'timestamp': '2025-09-10 02:41:28.058356', 'step': 13233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:28.087880', 'step': 13233, 'epoch': 2} {'type': 'loss', 'content': 0.05786348134279251, 'timestamp': '2025-09-10 02:41:28.090029', 'step': 13234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:28.120981', 'step': 13234, 'epoch': 2} {'type': 'loss', 'content': 0.1148015484213829, 'timestamp': '2025-09-10 02:41:28.123508', 'step': 13235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:28.154581', 'step': 13235, 'epoch': 2} {'type': 'loss', 'content': 0.0762634202837944, 'timestamp': '2025-09-10 02:41:28.177754', 'step': 13236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:28.207619', 'step': 13236, 'epoch': 2} {'type': 'loss', 'content': 0.08961403369903564, 'timestamp': '2025-09-10 02:41:28.209939', 'step': 13237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:28.240212', 'step': 13237, 'epoch': 2} {'type': 'loss', 'content': 0.10497492551803589, 'timestamp': '2025-09-10 02:41:28.243350', 'step': 13238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:28.275381', 'step': 13238, 'epoch': 2} {'type': 'loss', 'content': 0.12519420683383942, 'timestamp': '2025-09-10 02:41:28.277447', 'step': 13239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:28.307710', 'step': 13239, 'epoch': 2} {'type': 'loss', 'content': 0.0827370434999466, 'timestamp': '2025-09-10 02:41:28.331640', 'step': 13240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:28.362576', 'step': 13240, 'epoch': 2} {'type': 'loss', 'content': 0.12837374210357666, 'timestamp': '2025-09-10 02:41:28.365088', 'step': 13241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.395481', 'step': 13241, 'epoch': 2} {'type': 'loss', 'content': 0.041427720338106155, 'timestamp': '2025-09-10 02:41:28.398050', 'step': 13242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.428855', 'step': 13242, 'epoch': 2} {'type': 'loss', 'content': 0.13909828662872314, 'timestamp': '2025-09-10 02:41:28.431233', 'step': 13243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:28.460889', 'step': 13243, 'epoch': 2} {'type': 'loss', 'content': 0.07905906438827515, 'timestamp': '2025-09-10 02:41:28.484526', 'step': 13244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:28.516684', 'step': 13244, 'epoch': 2} {'type': 'loss', 'content': 0.1464153379201889, 'timestamp': '2025-09-10 02:41:28.518880', 'step': 13245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:28.555225', 'step': 13245, 'epoch': 2} {'type': 'loss', 'content': 0.13501819968223572, 'timestamp': '2025-09-10 02:41:28.558194', 'step': 13246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.592843', 'step': 13246, 'epoch': 2} {'type': 'loss', 'content': 0.13152217864990234, 'timestamp': '2025-09-10 02:41:28.594913', 'step': 13247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.627339', 'step': 13247, 'epoch': 2} {'type': 'loss', 'content': 0.04313110187649727, 'timestamp': '2025-09-10 02:41:28.651013', 'step': 13248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.680652', 'step': 13248, 'epoch': 2} {'type': 'loss', 'content': 0.10148192942142487, 'timestamp': '2025-09-10 02:41:28.685883', 'step': 13249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.716233', 'step': 13249, 'epoch': 2} {'type': 'loss', 'content': 0.14558392763137817, 'timestamp': '2025-09-10 02:41:28.718227', 'step': 13250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.756955', 'step': 13250, 'epoch': 2} {'type': 'loss', 'content': 0.10029811412096024, 'timestamp': '2025-09-10 02:41:28.759413', 'step': 13251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.789547', 'step': 13251, 'epoch': 2} {'type': 'loss', 'content': 0.06414162367582321, 'timestamp': '2025-09-10 02:41:28.817262', 'step': 13252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:28.847189', 'step': 13252, 'epoch': 2} {'type': 'loss', 'content': 0.07909903675317764, 'timestamp': '2025-09-10 02:41:28.849736', 'step': 13253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:28.879846', 'step': 13253, 'epoch': 2} {'type': 'loss', 'content': 0.08359783887863159, 'timestamp': '2025-09-10 02:41:28.882260', 'step': 13254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:28.921287', 'step': 13254, 'epoch': 2} {'type': 'loss', 'content': 0.08844026923179626, 'timestamp': '2025-09-10 02:41:28.924897', 'step': 13255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:28.955431', 'step': 13255, 'epoch': 2} {'type': 'loss', 'content': 0.12152843177318573, 'timestamp': '2025-09-10 02:41:28.978905', 'step': 13256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.012955', 'step': 13256, 'epoch': 2} {'type': 'loss', 'content': 0.1066867858171463, 'timestamp': '2025-09-10 02:41:29.016346', 'step': 13257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:29.046620', 'step': 13257, 'epoch': 2} {'type': 'loss', 'content': 0.0428348034620285, 'timestamp': '2025-09-10 02:41:29.054569', 'step': 13258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.097698', 'step': 13258, 'epoch': 2} {'type': 'loss', 'content': 0.07244609296321869, 'timestamp': '2025-09-10 02:41:29.100188', 'step': 13259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:29.129447', 'step': 13259, 'epoch': 2} {'type': 'loss', 'content': 0.06167718395590782, 'timestamp': '2025-09-10 02:41:29.152568', 'step': 13260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.182598', 'step': 13260, 'epoch': 2} {'type': 'loss', 'content': 0.2008279263973236, 'timestamp': '2025-09-10 02:41:29.185261', 'step': 13261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:29.216968', 'step': 13261, 'epoch': 2} {'type': 'loss', 'content': 0.1522700935602188, 'timestamp': '2025-09-10 02:41:29.219379', 'step': 13262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:29.250450', 'step': 13262, 'epoch': 2} {'type': 'loss', 'content': 0.007750713266432285, 'timestamp': '2025-09-10 02:41:29.253092', 'step': 13263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.289669', 'step': 13263, 'epoch': 2} {'type': 'loss', 'content': 0.06520899385213852, 'timestamp': '2025-09-10 02:41:29.313207', 'step': 13264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.348698', 'step': 13264, 'epoch': 2} {'type': 'loss', 'content': 0.08327971398830414, 'timestamp': '2025-09-10 02:41:29.351260', 'step': 13265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:29.385715', 'step': 13265, 'epoch': 2} {'type': 'loss', 'content': 0.13886353373527527, 'timestamp': '2025-09-10 02:41:29.396306', 'step': 13266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:29.438188', 'step': 13266, 'epoch': 2} {'type': 'loss', 'content': 0.09937556833028793, 'timestamp': '2025-09-10 02:41:29.441490', 'step': 13267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:29.472965', 'step': 13267, 'epoch': 2} {'type': 'loss', 'content': 0.11989500373601913, 'timestamp': '2025-09-10 02:41:29.497382', 'step': 13268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:29.528671', 'step': 13268, 'epoch': 2} {'type': 'loss', 'content': 0.08559667319059372, 'timestamp': '2025-09-10 02:41:29.532627', 'step': 13269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.565121', 'step': 13269, 'epoch': 2} {'type': 'loss', 'content': 0.07775896042585373, 'timestamp': '2025-09-10 02:41:29.567864', 'step': 13270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.598162', 'step': 13270, 'epoch': 2} {'type': 'loss', 'content': 0.04403426870703697, 'timestamp': '2025-09-10 02:41:29.600776', 'step': 13271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:29.631185', 'step': 13271, 'epoch': 2} {'type': 'loss', 'content': 0.05480656027793884, 'timestamp': '2025-09-10 02:41:29.654961', 'step': 13272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:29.686364', 'step': 13272, 'epoch': 2} {'type': 'loss', 'content': 0.10607185959815979, 'timestamp': '2025-09-10 02:41:29.688873', 'step': 13273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.719027', 'step': 13273, 'epoch': 2} {'type': 'loss', 'content': 0.11035160720348358, 'timestamp': '2025-09-10 02:41:29.721479', 'step': 13274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:29.755795', 'step': 13274, 'epoch': 2} {'type': 'loss', 'content': 0.13926644623279572, 'timestamp': '2025-09-10 02:41:29.758269', 'step': 13275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.789296', 'step': 13275, 'epoch': 2} {'type': 'loss', 'content': 0.10462014377117157, 'timestamp': '2025-09-10 02:41:29.813002', 'step': 13276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:29.843656', 'step': 13276, 'epoch': 2} {'type': 'loss', 'content': 0.07555218786001205, 'timestamp': '2025-09-10 02:41:29.846068', 'step': 13277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.876142', 'step': 13277, 'epoch': 2} {'type': 'loss', 'content': 0.11654673516750336, 'timestamp': '2025-09-10 02:41:29.878812', 'step': 13278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.909507', 'step': 13278, 'epoch': 2} {'type': 'loss', 'content': 0.02649712935090065, 'timestamp': '2025-09-10 02:41:29.911850', 'step': 13279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:29.942136', 'step': 13279, 'epoch': 2} {'type': 'loss', 'content': 0.15762382745742798, 'timestamp': '2025-09-10 02:41:29.965931', 'step': 13280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:29.997091', 'step': 13280, 'epoch': 2} {'type': 'loss', 'content': 0.07650668919086456, 'timestamp': '2025-09-10 02:41:30.000068', 'step': 13281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:30.031298', 'step': 13281, 'epoch': 2} {'type': 'loss', 'content': 0.16395896673202515, 'timestamp': '2025-09-10 02:41:30.033882', 'step': 13282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:30.067618', 'step': 13282, 'epoch': 2} {'type': 'loss', 'content': 0.07278310507535934, 'timestamp': '2025-09-10 02:41:30.070451', 'step': 13283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.101647', 'step': 13283, 'epoch': 2} {'type': 'loss', 'content': 0.08436784893274307, 'timestamp': '2025-09-10 02:41:30.125371', 'step': 13284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:30.156770', 'step': 13284, 'epoch': 2} {'type': 'loss', 'content': 0.0847940444946289, 'timestamp': '2025-09-10 02:41:30.159283', 'step': 13285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:30.190465', 'step': 13285, 'epoch': 2} {'type': 'loss', 'content': 0.15212537348270416, 'timestamp': '2025-09-10 02:41:30.193174', 'step': 13286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:30.223783', 'step': 13286, 'epoch': 2} {'type': 'loss', 'content': 0.10310585051774979, 'timestamp': '2025-09-10 02:41:30.226164', 'step': 13287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.258291', 'step': 13287, 'epoch': 2} {'type': 'loss', 'content': 0.03095170296728611, 'timestamp': '2025-09-10 02:41:30.282132', 'step': 13288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:30.317605', 'step': 13288, 'epoch': 2} {'type': 'loss', 'content': 0.08696366101503372, 'timestamp': '2025-09-10 02:41:30.324909', 'step': 13289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.367257', 'step': 13289, 'epoch': 2} {'type': 'loss', 'content': 0.11592119932174683, 'timestamp': '2025-09-10 02:41:30.369704', 'step': 13290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.400104', 'step': 13290, 'epoch': 2} {'type': 'loss', 'content': 0.14691896736621857, 'timestamp': '2025-09-10 02:41:30.402356', 'step': 13291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:30.432628', 'step': 13291, 'epoch': 2} {'type': 'loss', 'content': 0.07856988161802292, 'timestamp': '2025-09-10 02:41:30.457198', 'step': 13292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:41:30.488569', 'step': 13292, 'epoch': 2} {'type': 'loss', 'content': 0.14168725907802582, 'timestamp': '2025-09-10 02:41:30.491473', 'step': 13293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.522174', 'step': 13293, 'epoch': 2} {'type': 'loss', 'content': 0.07507730275392532, 'timestamp': '2025-09-10 02:41:30.524906', 'step': 13294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.555452', 'step': 13294, 'epoch': 2} {'type': 'loss', 'content': 0.05084041506052017, 'timestamp': '2025-09-10 02:41:30.557810', 'step': 13295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:30.588175', 'step': 13295, 'epoch': 2} {'type': 'loss', 'content': 0.1428932249546051, 'timestamp': '2025-09-10 02:41:30.613336', 'step': 13296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.643271', 'step': 13296, 'epoch': 2} {'type': 'loss', 'content': 0.09554178267717361, 'timestamp': '2025-09-10 02:41:30.645917', 'step': 13297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.676662', 'step': 13297, 'epoch': 2} {'type': 'loss', 'content': 0.09650872647762299, 'timestamp': '2025-09-10 02:41:30.680274', 'step': 13298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.711969', 'step': 13298, 'epoch': 2} {'type': 'loss', 'content': 0.08687827736139297, 'timestamp': '2025-09-10 02:41:30.714669', 'step': 13299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:30.745038', 'step': 13299, 'epoch': 2} {'type': 'loss', 'content': 0.09579434990882874, 'timestamp': '2025-09-10 02:41:30.768781', 'step': 13300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:30.800012', 'step': 13300, 'epoch': 2} {'type': 'loss', 'content': 0.0644749328494072, 'timestamp': '2025-09-10 02:41:30.802583', 'step': 13301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.832916', 'step': 13301, 'epoch': 2} {'type': 'loss', 'content': 0.13058586418628693, 'timestamp': '2025-09-10 02:41:30.835877', 'step': 13302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:30.868367', 'step': 13302, 'epoch': 2} {'type': 'loss', 'content': 0.0855783224105835, 'timestamp': '2025-09-10 02:41:30.870583', 'step': 13303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:30.901840', 'step': 13303, 'epoch': 2} {'type': 'loss', 'content': 0.08969505876302719, 'timestamp': '2025-09-10 02:41:30.925488', 'step': 13304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:30.956698', 'step': 13304, 'epoch': 2} {'type': 'loss', 'content': 0.12473363429307938, 'timestamp': '2025-09-10 02:41:30.959277', 'step': 13305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:30.990225', 'step': 13305, 'epoch': 2} {'type': 'loss', 'content': 0.1672901064157486, 'timestamp': '2025-09-10 02:41:30.992574', 'step': 13306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:31.022334', 'step': 13306, 'epoch': 2} {'type': 'loss', 'content': 0.12941700220108032, 'timestamp': '2025-09-10 02:41:31.024970', 'step': 13307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:31.055161', 'step': 13307, 'epoch': 2} {'type': 'loss', 'content': 0.10316147655248642, 'timestamp': '2025-09-10 02:41:31.078985', 'step': 13308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:31.110032', 'step': 13308, 'epoch': 2} {'type': 'loss', 'content': 0.11688930541276932, 'timestamp': '2025-09-10 02:41:31.112321', 'step': 13309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:31.141937', 'step': 13309, 'epoch': 2} {'type': 'loss', 'content': 0.18778793513774872, 'timestamp': '2025-09-10 02:41:31.145118', 'step': 13310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:31.175593', 'step': 13310, 'epoch': 2} {'type': 'loss', 'content': 0.10064200311899185, 'timestamp': '2025-09-10 02:41:31.177966', 'step': 13311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:31.209487', 'step': 13311, 'epoch': 2} {'type': 'loss', 'content': 0.11964349448680878, 'timestamp': '2025-09-10 02:41:31.233513', 'step': 13312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:31.264192', 'step': 13312, 'epoch': 2} {'type': 'loss', 'content': 0.12127767503261566, 'timestamp': '2025-09-10 02:41:31.266488', 'step': 13313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:31.296254', 'step': 13313, 'epoch': 2} {'type': 'loss', 'content': 0.09161195158958435, 'timestamp': '2025-09-10 02:41:31.299043', 'step': 13314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:31.329446', 'step': 13314, 'epoch': 2} {'type': 'loss', 'content': 0.14282339811325073, 'timestamp': '2025-09-10 02:41:31.332361', 'step': 13315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:31.363534', 'step': 13315, 'epoch': 2} {'type': 'loss', 'content': 0.04578344523906708, 'timestamp': '2025-09-10 02:41:31.387442', 'step': 13316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:31.423909', 'step': 13316, 'epoch': 2} {'type': 'loss', 'content': 0.11039559543132782, 'timestamp': '2025-09-10 02:41:31.426286', 'step': 13317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:31.456547', 'step': 13317, 'epoch': 2} {'type': 'loss', 'content': 0.11002907156944275, 'timestamp': '2025-09-10 02:41:31.458909', 'step': 13318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:31.489652', 'step': 13318, 'epoch': 2} {'type': 'loss', 'content': 0.12206409871578217, 'timestamp': '2025-09-10 02:41:31.492677', 'step': 13319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:31.524727', 'step': 13319, 'epoch': 2} {'type': 'loss', 'content': 0.17364466190338135, 'timestamp': '2025-09-10 02:41:31.548547', 'step': 13320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:31.578369', 'step': 13320, 'epoch': 2} {'type': 'loss', 'content': 0.11847047507762909, 'timestamp': '2025-09-10 02:41:31.580784', 'step': 13321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:31.611909', 'step': 13321, 'epoch': 2} {'type': 'loss', 'content': 0.1081966906785965, 'timestamp': '2025-09-10 02:41:31.614542', 'step': 13322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:31.645539', 'step': 13322, 'epoch': 2} {'type': 'loss', 'content': 0.09074588865041733, 'timestamp': '2025-09-10 02:41:31.647822', 'step': 13323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:31.679329', 'step': 13323, 'epoch': 2} {'type': 'loss', 'content': 0.05143275856971741, 'timestamp': '2025-09-10 02:41:31.703537', 'step': 13324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:31.735048', 'step': 13324, 'epoch': 2} {'type': 'loss', 'content': 0.10949569195508957, 'timestamp': '2025-09-10 02:41:31.737942', 'step': 13325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:31.768918', 'step': 13325, 'epoch': 2} {'type': 'loss', 'content': 0.10659002512693405, 'timestamp': '2025-09-10 02:41:31.771659', 'step': 13326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:31.802341', 'step': 13326, 'epoch': 2} {'type': 'loss', 'content': 0.09025823324918747, 'timestamp': '2025-09-10 02:41:31.804784', 'step': 13327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:31.834707', 'step': 13327, 'epoch': 2} {'type': 'loss', 'content': 0.13776984810829163, 'timestamp': '2025-09-10 02:41:31.858368', 'step': 13328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:31.888929', 'step': 13328, 'epoch': 2} {'type': 'loss', 'content': 0.0745672956109047, 'timestamp': '2025-09-10 02:41:31.892155', 'step': 13329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:31.923409', 'step': 13329, 'epoch': 2} {'type': 'loss', 'content': 0.15970072150230408, 'timestamp': '2025-09-10 02:41:31.926108', 'step': 13330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:31.956543', 'step': 13330, 'epoch': 2} {'type': 'loss', 'content': 0.06773290038108826, 'timestamp': '2025-09-10 02:41:31.959253', 'step': 13331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:31.990141', 'step': 13331, 'epoch': 2} {'type': 'loss', 'content': 0.07192427664995193, 'timestamp': '2025-09-10 02:41:32.013942', 'step': 13332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:32.045380', 'step': 13332, 'epoch': 2} {'type': 'loss', 'content': 0.10524928569793701, 'timestamp': '2025-09-10 02:41:32.048085', 'step': 13333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:32.079711', 'step': 13333, 'epoch': 2} {'type': 'loss', 'content': 0.027761509642004967, 'timestamp': '2025-09-10 02:41:32.082397', 'step': 13334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:32.112780', 'step': 13334, 'epoch': 2} {'type': 'loss', 'content': 0.10781392455101013, 'timestamp': '2025-09-10 02:41:32.115389', 'step': 13335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:32.146090', 'step': 13335, 'epoch': 2} {'type': 'loss', 'content': 0.09700378775596619, 'timestamp': '2025-09-10 02:41:32.169772', 'step': 13336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:32.200202', 'step': 13336, 'epoch': 2} {'type': 'loss', 'content': 0.05198067054152489, 'timestamp': '2025-09-10 02:41:32.202666', 'step': 13337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:32.233767', 'step': 13337, 'epoch': 2} {'type': 'loss', 'content': 0.06515499204397202, 'timestamp': '2025-09-10 02:41:32.236280', 'step': 13338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:32.266164', 'step': 13338, 'epoch': 2} {'type': 'loss', 'content': 0.1135082095861435, 'timestamp': '2025-09-10 02:41:32.268507', 'step': 13339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:32.299769', 'step': 13339, 'epoch': 2} {'type': 'loss', 'content': 0.1124999150633812, 'timestamp': '2025-09-10 02:41:32.323431', 'step': 13340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:32.353888', 'step': 13340, 'epoch': 2} {'type': 'loss', 'content': 0.17501921951770782, 'timestamp': '2025-09-10 02:41:32.356432', 'step': 13341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:32.388604', 'step': 13341, 'epoch': 2} {'type': 'loss', 'content': 0.10085107386112213, 'timestamp': '2025-09-10 02:41:32.391469', 'step': 13342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:32.429261', 'step': 13342, 'epoch': 2} {'type': 'loss', 'content': 0.11857501417398453, 'timestamp': '2025-09-10 02:41:32.432436', 'step': 13343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:32.463279', 'step': 13343, 'epoch': 2} {'type': 'loss', 'content': 0.09811101108789444, 'timestamp': '2025-09-10 02:41:32.487678', 'step': 13344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:32.520063', 'step': 13344, 'epoch': 2} {'type': 'loss', 'content': 0.04614662751555443, 'timestamp': '2025-09-10 02:41:32.523624', 'step': 13345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:32.557636', 'step': 13345, 'epoch': 2} {'type': 'loss', 'content': 0.15318438410758972, 'timestamp': '2025-09-10 02:41:32.562351', 'step': 13346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:32.595443', 'step': 13346, 'epoch': 2} {'type': 'loss', 'content': 0.0717652291059494, 'timestamp': '2025-09-10 02:41:32.599220', 'step': 13347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:32.632356', 'step': 13347, 'epoch': 2} {'type': 'loss', 'content': 0.10471351444721222, 'timestamp': '2025-09-10 02:41:32.657339', 'step': 13348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:32.692180', 'step': 13348, 'epoch': 2} {'type': 'loss', 'content': 0.2324802577495575, 'timestamp': '2025-09-10 02:41:32.695624', 'step': 13349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:32.727673', 'step': 13349, 'epoch': 2} {'type': 'loss', 'content': 0.053219396620988846, 'timestamp': '2025-09-10 02:41:32.730112', 'step': 13350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:32.761808', 'step': 13350, 'epoch': 2} {'type': 'loss', 'content': 0.15494218468666077, 'timestamp': '2025-09-10 02:41:32.765468', 'step': 13351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:32.795724', 'step': 13351, 'epoch': 2} {'type': 'loss', 'content': 0.07820004224777222, 'timestamp': '2025-09-10 02:41:32.820067', 'step': 13352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:32.850966', 'step': 13352, 'epoch': 2} {'type': 'loss', 'content': 0.08942956477403641, 'timestamp': '2025-09-10 02:41:32.853316', 'step': 13353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:32.883482', 'step': 13353, 'epoch': 2} {'type': 'loss', 'content': 0.05866342782974243, 'timestamp': '2025-09-10 02:41:32.886214', 'step': 13354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:32.917446', 'step': 13354, 'epoch': 2} {'type': 'loss', 'content': 0.17445100843906403, 'timestamp': '2025-09-10 02:41:32.920610', 'step': 13355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:32.952023', 'step': 13355, 'epoch': 2} {'type': 'loss', 'content': 0.11507954448461533, 'timestamp': '2025-09-10 02:41:32.975773', 'step': 13356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:33.005934', 'step': 13356, 'epoch': 2} {'type': 'loss', 'content': 0.12391170114278793, 'timestamp': '2025-09-10 02:41:33.008642', 'step': 13357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:33.039381', 'step': 13357, 'epoch': 2} {'type': 'loss', 'content': 0.09072147309780121, 'timestamp': '2025-09-10 02:41:33.042650', 'step': 13358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:33.073650', 'step': 13358, 'epoch': 2} {'type': 'loss', 'content': 0.026576166972517967, 'timestamp': '2025-09-10 02:41:33.076662', 'step': 13359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:33.107341', 'step': 13359, 'epoch': 2} {'type': 'loss', 'content': 0.16081635653972626, 'timestamp': '2025-09-10 02:41:33.131095', 'step': 13360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:33.161888', 'step': 13360, 'epoch': 2} {'type': 'loss', 'content': 0.11893118917942047, 'timestamp': '2025-09-10 02:41:33.164799', 'step': 13361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:33.196973', 'step': 13361, 'epoch': 2} {'type': 'loss', 'content': 0.07965181022882462, 'timestamp': '2025-09-10 02:41:33.199227', 'step': 13362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:33.229622', 'step': 13362, 'epoch': 2} {'type': 'loss', 'content': 0.06684773415327072, 'timestamp': '2025-09-10 02:41:33.232064', 'step': 13363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:33.264766', 'step': 13363, 'epoch': 2} {'type': 'loss', 'content': 0.17539647221565247, 'timestamp': '2025-09-10 02:41:33.288877', 'step': 13364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:33.321724', 'step': 13364, 'epoch': 2} {'type': 'loss', 'content': 0.1417376846075058, 'timestamp': '2025-09-10 02:41:33.324294', 'step': 13365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:33.355919', 'step': 13365, 'epoch': 2} {'type': 'loss', 'content': 0.1023053228855133, 'timestamp': '2025-09-10 02:41:33.358415', 'step': 13366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:33.389488', 'step': 13366, 'epoch': 2} {'type': 'loss', 'content': 0.12442251294851303, 'timestamp': '2025-09-10 02:41:33.392599', 'step': 13367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:33.428673', 'step': 13367, 'epoch': 2} {'type': 'loss', 'content': 0.1611815094947815, 'timestamp': '2025-09-10 02:41:33.453663', 'step': 13368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:33.483961', 'step': 13368, 'epoch': 2} {'type': 'loss', 'content': 0.09024367481470108, 'timestamp': '2025-09-10 02:41:33.486098', 'step': 13369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:33.517234', 'step': 13369, 'epoch': 2} {'type': 'loss', 'content': 0.09221503138542175, 'timestamp': '2025-09-10 02:41:33.519600', 'step': 13370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:33.549880', 'step': 13370, 'epoch': 2} {'type': 'loss', 'content': 0.10448982566595078, 'timestamp': '2025-09-10 02:41:33.553154', 'step': 13371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:33.584575', 'step': 13371, 'epoch': 2} {'type': 'loss', 'content': 0.10858017951250076, 'timestamp': '2025-09-10 02:41:33.608798', 'step': 13372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:33.640207', 'step': 13372, 'epoch': 2} {'type': 'loss', 'content': 0.08582179248332977, 'timestamp': '2025-09-10 02:41:33.643192', 'step': 13373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:33.674330', 'step': 13373, 'epoch': 2} {'type': 'loss', 'content': 0.2294609248638153, 'timestamp': '2025-09-10 02:41:33.677162', 'step': 13374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:33.707770', 'step': 13374, 'epoch': 2} {'type': 'loss', 'content': 0.12923592329025269, 'timestamp': '2025-09-10 02:41:33.710183', 'step': 13375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:33.740137', 'step': 13375, 'epoch': 2} {'type': 'loss', 'content': 0.09540089964866638, 'timestamp': '2025-09-10 02:41:33.764247', 'step': 13376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:33.794353', 'step': 13376, 'epoch': 2} {'type': 'loss', 'content': 0.11022613942623138, 'timestamp': '2025-09-10 02:41:33.798648', 'step': 13377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:33.830163', 'step': 13377, 'epoch': 2} {'type': 'loss', 'content': 0.07777931541204453, 'timestamp': '2025-09-10 02:41:33.832515', 'step': 13378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:33.863128', 'step': 13378, 'epoch': 2} {'type': 'loss', 'content': 0.16617420315742493, 'timestamp': '2025-09-10 02:41:33.865303', 'step': 13379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:33.895650', 'step': 13379, 'epoch': 2} {'type': 'loss', 'content': 0.09955663233995438, 'timestamp': '2025-09-10 02:41:33.919208', 'step': 13380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:33.949505', 'step': 13380, 'epoch': 2} {'type': 'loss', 'content': 0.09747582674026489, 'timestamp': '2025-09-10 02:41:33.951977', 'step': 13381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:33.982538', 'step': 13381, 'epoch': 2} {'type': 'loss', 'content': 0.12714235484600067, 'timestamp': '2025-09-10 02:41:33.985220', 'step': 13382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.016102', 'step': 13382, 'epoch': 2} {'type': 'loss', 'content': 0.18476346135139465, 'timestamp': '2025-09-10 02:41:34.018583', 'step': 13383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:34.050291', 'step': 13383, 'epoch': 2} {'type': 'loss', 'content': 0.09556911885738373, 'timestamp': '2025-09-10 02:41:34.073883', 'step': 13384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:34.107014', 'step': 13384, 'epoch': 2} {'type': 'loss', 'content': 0.04866357892751694, 'timestamp': '2025-09-10 02:41:34.109823', 'step': 13385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:34.139661', 'step': 13385, 'epoch': 2} {'type': 'loss', 'content': 0.10836582630872726, 'timestamp': '2025-09-10 02:41:34.142822', 'step': 13386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.173702', 'step': 13386, 'epoch': 2} {'type': 'loss', 'content': 0.0486910305917263, 'timestamp': '2025-09-10 02:41:34.176379', 'step': 13387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.206672', 'step': 13387, 'epoch': 2} {'type': 'loss', 'content': 0.05512034147977829, 'timestamp': '2025-09-10 02:41:34.230441', 'step': 13388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.261033', 'step': 13388, 'epoch': 2} {'type': 'loss', 'content': 0.06379152089357376, 'timestamp': '2025-09-10 02:41:34.265323', 'step': 13389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.296031', 'step': 13389, 'epoch': 2} {'type': 'loss', 'content': 0.04013191536068916, 'timestamp': '2025-09-10 02:41:34.299464', 'step': 13390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.330043', 'step': 13390, 'epoch': 2} {'type': 'loss', 'content': 0.09969344735145569, 'timestamp': '2025-09-10 02:41:34.332242', 'step': 13391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.362085', 'step': 13391, 'epoch': 2} {'type': 'loss', 'content': 0.11840180307626724, 'timestamp': '2025-09-10 02:41:34.385663', 'step': 13392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:34.417043', 'step': 13392, 'epoch': 2} {'type': 'loss', 'content': 0.19736459851264954, 'timestamp': '2025-09-10 02:41:34.420444', 'step': 13393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.451461', 'step': 13393, 'epoch': 2} {'type': 'loss', 'content': 0.0945136770606041, 'timestamp': '2025-09-10 02:41:34.455034', 'step': 13394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:34.486344', 'step': 13394, 'epoch': 2} {'type': 'loss', 'content': 0.10843542963266373, 'timestamp': '2025-09-10 02:41:34.489384', 'step': 13395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:34.520570', 'step': 13395, 'epoch': 2} {'type': 'loss', 'content': 0.09252268075942993, 'timestamp': '2025-09-10 02:41:34.544377', 'step': 13396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:34.576028', 'step': 13396, 'epoch': 2} {'type': 'loss', 'content': 0.05700903385877609, 'timestamp': '2025-09-10 02:41:34.578632', 'step': 13397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:34.611599', 'step': 13397, 'epoch': 2} {'type': 'loss', 'content': 0.07541844248771667, 'timestamp': '2025-09-10 02:41:34.613855', 'step': 13398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:34.643759', 'step': 13398, 'epoch': 2} {'type': 'loss', 'content': 0.07792647182941437, 'timestamp': '2025-09-10 02:41:34.645971', 'step': 13399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.675911', 'step': 13399, 'epoch': 2} {'type': 'loss', 'content': 0.2044740617275238, 'timestamp': '2025-09-10 02:41:34.699404', 'step': 13400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.730560', 'step': 13400, 'epoch': 2} {'type': 'loss', 'content': 0.14016598463058472, 'timestamp': '2025-09-10 02:41:34.733533', 'step': 13401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:34.764466', 'step': 13401, 'epoch': 2} {'type': 'loss', 'content': 0.13220719993114471, 'timestamp': '2025-09-10 02:41:34.767429', 'step': 13402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:34.798309', 'step': 13402, 'epoch': 2} {'type': 'loss', 'content': 0.040185943245887756, 'timestamp': '2025-09-10 02:41:34.801029', 'step': 13403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:34.833439', 'step': 13403, 'epoch': 2} {'type': 'loss', 'content': 0.06443475186824799, 'timestamp': '2025-09-10 02:41:34.857851', 'step': 13404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:34.889663', 'step': 13404, 'epoch': 2} {'type': 'loss', 'content': 0.20346586406230927, 'timestamp': '2025-09-10 02:41:34.893066', 'step': 13405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:34.923931', 'step': 13405, 'epoch': 2} {'type': 'loss', 'content': 0.08377610146999359, 'timestamp': '2025-09-10 02:41:34.926319', 'step': 13406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:34.956370', 'step': 13406, 'epoch': 2} {'type': 'loss', 'content': 0.09143919497728348, 'timestamp': '2025-09-10 02:41:34.958988', 'step': 13407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:34.990058', 'step': 13407, 'epoch': 2} {'type': 'loss', 'content': 0.14659149944782257, 'timestamp': '2025-09-10 02:41:35.015281', 'step': 13408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:35.046128', 'step': 13408, 'epoch': 2} {'type': 'loss', 'content': 0.10553485155105591, 'timestamp': '2025-09-10 02:41:35.048434', 'step': 13409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:35.079509', 'step': 13409, 'epoch': 2} {'type': 'loss', 'content': 0.1451161652803421, 'timestamp': '2025-09-10 02:41:35.082102', 'step': 13410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:35.112658', 'step': 13410, 'epoch': 2} {'type': 'loss', 'content': 0.10723879933357239, 'timestamp': '2025-09-10 02:41:35.114811', 'step': 13411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:35.144280', 'step': 13411, 'epoch': 2} {'type': 'loss', 'content': 0.1219838336110115, 'timestamp': '2025-09-10 02:41:35.168303', 'step': 13412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:35.200138', 'step': 13412, 'epoch': 2} {'type': 'loss', 'content': 0.1295108199119568, 'timestamp': '2025-09-10 02:41:35.203179', 'step': 13413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:35.233161', 'step': 13413, 'epoch': 2} {'type': 'loss', 'content': 0.08309280872344971, 'timestamp': '2025-09-10 02:41:35.235663', 'step': 13414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:35.265717', 'step': 13414, 'epoch': 2} {'type': 'loss', 'content': 0.09421783685684204, 'timestamp': '2025-09-10 02:41:35.268320', 'step': 13415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:35.302045', 'step': 13415, 'epoch': 2} {'type': 'loss', 'content': 0.057891596108675, 'timestamp': '2025-09-10 02:41:35.325855', 'step': 13416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:35.356249', 'step': 13416, 'epoch': 2} {'type': 'loss', 'content': 0.08305306732654572, 'timestamp': '2025-09-10 02:41:35.358612', 'step': 13417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:35.389691', 'step': 13417, 'epoch': 2} {'type': 'loss', 'content': 0.10040305554866791, 'timestamp': '2025-09-10 02:41:35.392435', 'step': 13418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:35.425353', 'step': 13418, 'epoch': 2} {'type': 'loss', 'content': 0.15754586458206177, 'timestamp': '2025-09-10 02:41:35.427794', 'step': 13419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:35.457861', 'step': 13419, 'epoch': 2} {'type': 'loss', 'content': 0.06254933029413223, 'timestamp': '2025-09-10 02:41:35.481572', 'step': 13420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:35.511858', 'step': 13420, 'epoch': 2} {'type': 'loss', 'content': 0.12966781854629517, 'timestamp': '2025-09-10 02:41:35.514341', 'step': 13421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:35.544395', 'step': 13421, 'epoch': 2} {'type': 'loss', 'content': 0.1294606477022171, 'timestamp': '2025-09-10 02:41:35.546903', 'step': 13422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:35.576801', 'step': 13422, 'epoch': 2} {'type': 'loss', 'content': 0.026536917313933372, 'timestamp': '2025-09-10 02:41:35.579749', 'step': 13423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:35.610027', 'step': 13423, 'epoch': 2} {'type': 'loss', 'content': 0.04802463948726654, 'timestamp': '2025-09-10 02:41:35.634031', 'step': 13424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:35.664172', 'step': 13424, 'epoch': 2} {'type': 'loss', 'content': 0.07029762119054794, 'timestamp': '2025-09-10 02:41:35.666690', 'step': 13425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:35.696892', 'step': 13425, 'epoch': 2} {'type': 'loss', 'content': 0.06442314386367798, 'timestamp': '2025-09-10 02:41:35.700765', 'step': 13426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:35.732002', 'step': 13426, 'epoch': 2} {'type': 'loss', 'content': 0.06658817827701569, 'timestamp': '2025-09-10 02:41:35.734302', 'step': 13427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:35.764998', 'step': 13427, 'epoch': 2} {'type': 'loss', 'content': 0.10640965402126312, 'timestamp': '2025-09-10 02:41:35.788639', 'step': 13428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:35.819390', 'step': 13428, 'epoch': 2} {'type': 'loss', 'content': 0.06844469904899597, 'timestamp': '2025-09-10 02:41:35.822099', 'step': 13429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:35.851815', 'step': 13429, 'epoch': 2} {'type': 'loss', 'content': 0.10518087446689606, 'timestamp': '2025-09-10 02:41:35.854826', 'step': 13430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:35.885061', 'step': 13430, 'epoch': 2} {'type': 'loss', 'content': 0.0457928441464901, 'timestamp': '2025-09-10 02:41:35.887718', 'step': 13431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:41:35.918301', 'step': 13431, 'epoch': 2} {'type': 'loss', 'content': 0.059119630604982376, 'timestamp': '2025-09-10 02:41:35.945669', 'step': 13432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:35.976023', 'step': 13432, 'epoch': 2} {'type': 'loss', 'content': 0.12660527229309082, 'timestamp': '2025-09-10 02:41:35.978479', 'step': 13433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:36.009424', 'step': 13433, 'epoch': 2} {'type': 'loss', 'content': 0.07298583537340164, 'timestamp': '2025-09-10 02:41:36.012235', 'step': 13434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:36.041371', 'step': 13434, 'epoch': 2} {'type': 'loss', 'content': 0.1847313791513443, 'timestamp': '2025-09-10 02:41:36.043924', 'step': 13435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:36.074161', 'step': 13435, 'epoch': 2} {'type': 'loss', 'content': 0.10687354952096939, 'timestamp': '2025-09-10 02:41:36.097784', 'step': 13436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:36.128312', 'step': 13436, 'epoch': 2} {'type': 'loss', 'content': 0.08287681639194489, 'timestamp': '2025-09-10 02:41:36.131574', 'step': 13437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:36.162101', 'step': 13437, 'epoch': 2} {'type': 'loss', 'content': 0.12046940624713898, 'timestamp': '2025-09-10 02:41:36.164788', 'step': 13438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:36.194561', 'step': 13438, 'epoch': 2} {'type': 'loss', 'content': 0.14560529589653015, 'timestamp': '2025-09-10 02:41:36.197330', 'step': 13439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:41:36.228329', 'step': 13439, 'epoch': 2} {'type': 'loss', 'content': 0.09749238193035126, 'timestamp': '2025-09-10 02:41:36.253580', 'step': 13440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:36.284611', 'step': 13440, 'epoch': 2} {'type': 'loss', 'content': 0.11343266814947128, 'timestamp': '2025-09-10 02:41:36.287300', 'step': 13441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:36.319027', 'step': 13441, 'epoch': 2} {'type': 'loss', 'content': 0.1437733918428421, 'timestamp': '2025-09-10 02:41:36.321630', 'step': 13442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:36.352211', 'step': 13442, 'epoch': 2} {'type': 'loss', 'content': 0.12740032374858856, 'timestamp': '2025-09-10 02:41:36.354601', 'step': 13443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:36.385733', 'step': 13443, 'epoch': 2} {'type': 'loss', 'content': 0.14662979543209076, 'timestamp': '2025-09-10 02:41:36.410167', 'step': 13444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:36.452451', 'step': 13444, 'epoch': 2} {'type': 'loss', 'content': 0.07975386083126068, 'timestamp': '2025-09-10 02:41:36.455466', 'step': 13445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:36.486066', 'step': 13445, 'epoch': 2} {'type': 'loss', 'content': 0.062339115887880325, 'timestamp': '2025-09-10 02:41:36.488980', 'step': 13446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:36.521447', 'step': 13446, 'epoch': 2} {'type': 'loss', 'content': 0.08365914970636368, 'timestamp': '2025-09-10 02:41:36.523883', 'step': 13447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:36.553928', 'step': 13447, 'epoch': 2} {'type': 'loss', 'content': 0.11855977773666382, 'timestamp': '2025-09-10 02:41:36.577587', 'step': 13448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:36.608629', 'step': 13448, 'epoch': 2} {'type': 'loss', 'content': 0.07505888491868973, 'timestamp': '2025-09-10 02:41:36.611128', 'step': 13449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:36.642837', 'step': 13449, 'epoch': 2} {'type': 'loss', 'content': 0.13512380421161652, 'timestamp': '2025-09-10 02:41:36.645093', 'step': 13450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:36.675255', 'step': 13450, 'epoch': 2} {'type': 'loss', 'content': 0.0985894650220871, 'timestamp': '2025-09-10 02:41:36.677677', 'step': 13451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:36.707825', 'step': 13451, 'epoch': 2} {'type': 'loss', 'content': 0.05730560049414635, 'timestamp': '2025-09-10 02:41:36.731469', 'step': 13452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:36.761592', 'step': 13452, 'epoch': 2} {'type': 'loss', 'content': 0.09034831076860428, 'timestamp': '2025-09-10 02:41:36.763987', 'step': 13453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:36.793633', 'step': 13453, 'epoch': 2} {'type': 'loss', 'content': 0.14071455597877502, 'timestamp': '2025-09-10 02:41:36.796701', 'step': 13454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:36.830050', 'step': 13454, 'epoch': 2} {'type': 'loss', 'content': 0.16996869444847107, 'timestamp': '2025-09-10 02:41:36.832527', 'step': 13455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:36.863134', 'step': 13455, 'epoch': 2} {'type': 'loss', 'content': 0.08286353945732117, 'timestamp': '2025-09-10 02:41:36.886839', 'step': 13456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:36.917064', 'step': 13456, 'epoch': 2} {'type': 'loss', 'content': 0.11584708094596863, 'timestamp': '2025-09-10 02:41:36.919431', 'step': 13457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:36.949120', 'step': 13457, 'epoch': 2} {'type': 'loss', 'content': 0.09765160828828812, 'timestamp': '2025-09-10 02:41:36.951711', 'step': 13458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:36.982469', 'step': 13458, 'epoch': 2} {'type': 'loss', 'content': 0.1439650058746338, 'timestamp': '2025-09-10 02:41:36.985182', 'step': 13459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:37.015164', 'step': 13459, 'epoch': 2} {'type': 'loss', 'content': 0.17030198872089386, 'timestamp': '2025-09-10 02:41:37.040385', 'step': 13460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.071715', 'step': 13460, 'epoch': 2} {'type': 'loss', 'content': 0.10952349007129669, 'timestamp': '2025-09-10 02:41:37.074343', 'step': 13461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:37.105650', 'step': 13461, 'epoch': 2} {'type': 'loss', 'content': 0.11034717410802841, 'timestamp': '2025-09-10 02:41:37.108360', 'step': 13462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.139178', 'step': 13462, 'epoch': 2} {'type': 'loss', 'content': 0.11990851908922195, 'timestamp': '2025-09-10 02:41:37.141594', 'step': 13463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.172183', 'step': 13463, 'epoch': 2} {'type': 'loss', 'content': 0.1088617667555809, 'timestamp': '2025-09-10 02:41:37.195741', 'step': 13464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.226273', 'step': 13464, 'epoch': 2} {'type': 'loss', 'content': 0.11824619024991989, 'timestamp': '2025-09-10 02:41:37.228920', 'step': 13465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.258692', 'step': 13465, 'epoch': 2} {'type': 'loss', 'content': 0.06868357956409454, 'timestamp': '2025-09-10 02:41:37.261184', 'step': 13466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:37.291409', 'step': 13466, 'epoch': 2} {'type': 'loss', 'content': 0.181326761841774, 'timestamp': '2025-09-10 02:41:37.295458', 'step': 13467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:37.326468', 'step': 13467, 'epoch': 2} {'type': 'loss', 'content': 0.056095585227012634, 'timestamp': '2025-09-10 02:41:37.350026', 'step': 13468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:37.380528', 'step': 13468, 'epoch': 2} {'type': 'loss', 'content': 0.086549311876297, 'timestamp': '2025-09-10 02:41:37.383727', 'step': 13469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:37.413896', 'step': 13469, 'epoch': 2} {'type': 'loss', 'content': 0.12315402179956436, 'timestamp': '2025-09-10 02:41:37.416179', 'step': 13470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:37.447428', 'step': 13470, 'epoch': 2} {'type': 'loss', 'content': 0.14719773828983307, 'timestamp': '2025-09-10 02:41:37.450127', 'step': 13471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:37.479827', 'step': 13471, 'epoch': 2} {'type': 'loss', 'content': 0.041840046644210815, 'timestamp': '2025-09-10 02:41:37.503546', 'step': 13472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:37.534688', 'step': 13472, 'epoch': 2} {'type': 'loss', 'content': 0.13396960496902466, 'timestamp': '2025-09-10 02:41:37.537004', 'step': 13473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.567125', 'step': 13473, 'epoch': 2} {'type': 'loss', 'content': 0.12341398745775223, 'timestamp': '2025-09-10 02:41:37.569555', 'step': 13474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:41:37.599793', 'step': 13474, 'epoch': 2} {'type': 'loss', 'content': 0.06481023132801056, 'timestamp': '2025-09-10 02:41:37.603940', 'step': 13475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:37.634884', 'step': 13475, 'epoch': 2} {'type': 'loss', 'content': 0.1272180825471878, 'timestamp': '2025-09-10 02:41:37.658484', 'step': 13476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:37.689358', 'step': 13476, 'epoch': 2} {'type': 'loss', 'content': 0.09070219844579697, 'timestamp': '2025-09-10 02:41:37.691551', 'step': 13477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:37.721767', 'step': 13477, 'epoch': 2} {'type': 'loss', 'content': 0.04755738377571106, 'timestamp': '2025-09-10 02:41:37.724807', 'step': 13478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.756772', 'step': 13478, 'epoch': 2} {'type': 'loss', 'content': 0.05429975315928459, 'timestamp': '2025-09-10 02:41:37.759391', 'step': 13479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:37.790696', 'step': 13479, 'epoch': 2} {'type': 'loss', 'content': 0.14754600822925568, 'timestamp': '2025-09-10 02:41:37.814468', 'step': 13480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.845786', 'step': 13480, 'epoch': 2} {'type': 'loss', 'content': 0.1047833040356636, 'timestamp': '2025-09-10 02:41:37.847961', 'step': 13481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.877932', 'step': 13481, 'epoch': 2} {'type': 'loss', 'content': 0.07513991743326187, 'timestamp': '2025-09-10 02:41:37.880313', 'step': 13482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.912295', 'step': 13482, 'epoch': 2} {'type': 'loss', 'content': 0.07126364856958389, 'timestamp': '2025-09-10 02:41:37.914658', 'step': 13483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.944935', 'step': 13483, 'epoch': 2} {'type': 'loss', 'content': 0.14406631886959076, 'timestamp': '2025-09-10 02:41:37.968919', 'step': 13484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:37.998602', 'step': 13484, 'epoch': 2} {'type': 'loss', 'content': 0.06391295790672302, 'timestamp': '2025-09-10 02:41:38.000772', 'step': 13485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:38.030823', 'step': 13485, 'epoch': 2} {'type': 'loss', 'content': 0.09506765007972717, 'timestamp': '2025-09-10 02:41:38.033614', 'step': 13486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:38.063649', 'step': 13486, 'epoch': 2} {'type': 'loss', 'content': 0.06295537203550339, 'timestamp': '2025-09-10 02:41:38.065916', 'step': 13487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:38.096293', 'step': 13487, 'epoch': 2} {'type': 'loss', 'content': 0.0960218608379364, 'timestamp': '2025-09-10 02:41:38.120353', 'step': 13488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:38.151030', 'step': 13488, 'epoch': 2} {'type': 'loss', 'content': 0.13789460062980652, 'timestamp': '2025-09-10 02:41:38.153422', 'step': 13489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:38.183769', 'step': 13489, 'epoch': 2} {'type': 'loss', 'content': 0.08486142009496689, 'timestamp': '2025-09-10 02:41:38.186146', 'step': 13490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:38.215935', 'step': 13490, 'epoch': 2} {'type': 'loss', 'content': 0.07696632295846939, 'timestamp': '2025-09-10 02:41:38.218253', 'step': 13491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:38.248294', 'step': 13491, 'epoch': 2} {'type': 'loss', 'content': 0.09105627238750458, 'timestamp': '2025-09-10 02:41:38.271856', 'step': 13492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:38.301932', 'step': 13492, 'epoch': 2} {'type': 'loss', 'content': 0.10255839675664902, 'timestamp': '2025-09-10 02:41:38.304392', 'step': 13493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:38.335008', 'step': 13493, 'epoch': 2} {'type': 'loss', 'content': 0.10931751877069473, 'timestamp': '2025-09-10 02:41:38.337352', 'step': 13494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:38.367617', 'step': 13494, 'epoch': 2} {'type': 'loss', 'content': 0.17040260136127472, 'timestamp': '2025-09-10 02:41:38.370445', 'step': 13495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:38.401199', 'step': 13495, 'epoch': 2} {'type': 'loss', 'content': 0.19659289717674255, 'timestamp': '2025-09-10 02:41:38.424556', 'step': 13496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:38.455676', 'step': 13496, 'epoch': 2} {'type': 'loss', 'content': 0.08245474100112915, 'timestamp': '2025-09-10 02:41:38.458148', 'step': 13497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:38.487981', 'step': 13497, 'epoch': 2} {'type': 'loss', 'content': 0.10834082216024399, 'timestamp': '2025-09-10 02:41:38.490713', 'step': 13498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:38.521198', 'step': 13498, 'epoch': 2} {'type': 'loss', 'content': 0.10332735627889633, 'timestamp': '2025-09-10 02:41:38.523706', 'step': 13499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:38.554248', 'step': 13499, 'epoch': 2} {'type': 'loss', 'content': 0.08197367936372757, 'timestamp': '2025-09-10 02:41:38.578136', 'step': 13500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13500', 'timestamp': '2025-09-10 02:41:45.044840', 'step': 13500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:45.089639', 'step': 13500, 'epoch': 2} {'type': 'loss', 'content': 0.15339873731136322, 'timestamp': '2025-09-10 02:41:45.092825', 'step': 13501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:45.126133', 'step': 13501, 'epoch': 2} {'type': 'loss', 'content': 0.07004602253437042, 'timestamp': '2025-09-10 02:41:45.128809', 'step': 13502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:45.159021', 'step': 13502, 'epoch': 2} {'type': 'loss', 'content': 0.11594976484775543, 'timestamp': '2025-09-10 02:41:45.161418', 'step': 13503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:45.191315', 'step': 13503, 'epoch': 2} {'type': 'loss', 'content': 0.08716508746147156, 'timestamp': '2025-09-10 02:41:45.214914', 'step': 13504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:45.245798', 'step': 13504, 'epoch': 2} {'type': 'loss', 'content': 0.11848441511392593, 'timestamp': '2025-09-10 02:41:45.248242', 'step': 13505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:45.278357', 'step': 13505, 'epoch': 2} {'type': 'loss', 'content': 0.10341060161590576, 'timestamp': '2025-09-10 02:41:45.281120', 'step': 13506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:45.311815', 'step': 13506, 'epoch': 2} {'type': 'loss', 'content': 0.05529549717903137, 'timestamp': '2025-09-10 02:41:45.314250', 'step': 13507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:45.343865', 'step': 13507, 'epoch': 2} {'type': 'loss', 'content': 0.04765240103006363, 'timestamp': '2025-09-10 02:41:45.367588', 'step': 13508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:45.411395', 'step': 13508, 'epoch': 2} {'type': 'loss', 'content': 0.07272659987211227, 'timestamp': '2025-09-10 02:41:45.414277', 'step': 13509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:45.449812', 'step': 13509, 'epoch': 2} {'type': 'loss', 'content': 0.07810205966234207, 'timestamp': '2025-09-10 02:41:45.452238', 'step': 13510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:45.482575', 'step': 13510, 'epoch': 2} {'type': 'loss', 'content': 0.1659448891878128, 'timestamp': '2025-09-10 02:41:45.487067', 'step': 13511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:45.521210', 'step': 13511, 'epoch': 2} {'type': 'loss', 'content': 0.06673900783061981, 'timestamp': '2025-09-10 02:41:45.545029', 'step': 13512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:45.574912', 'step': 13512, 'epoch': 2} {'type': 'loss', 'content': 0.06321341544389725, 'timestamp': '2025-09-10 02:41:45.577252', 'step': 13513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:45.608242', 'step': 13513, 'epoch': 2} {'type': 'loss', 'content': 0.04790296405553818, 'timestamp': '2025-09-10 02:41:45.610636', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:41:53.905018', 'step': 13514, 'epoch': 2} {'type': 'pplx', 'content': 9497.580788752446, 'timestamp': '2025-09-10 02:41:53.908358', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:53.938911', 'step': 13514, 'epoch': 2} {'type': 'loss', 'content': 0.15696854889392853, 'timestamp': '2025-09-10 02:41:53.941388', 'step': 13515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:53.972117', 'step': 13515, 'epoch': 2} {'type': 'loss', 'content': 0.11981809884309769, 'timestamp': '2025-09-10 02:41:53.995876', 'step': 13516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:54.026969', 'step': 13516, 'epoch': 2} {'type': 'loss', 'content': 0.056378401815891266, 'timestamp': '2025-09-10 02:41:54.029617', 'step': 13517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:54.059893', 'step': 13517, 'epoch': 2} {'type': 'loss', 'content': 0.10962118953466415, 'timestamp': '2025-09-10 02:41:54.062378', 'step': 13518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:54.094076', 'step': 13518, 'epoch': 2} {'type': 'loss', 'content': 0.21228235960006714, 'timestamp': '2025-09-10 02:41:54.096824', 'step': 13519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:54.128572', 'step': 13519, 'epoch': 2} {'type': 'loss', 'content': 0.12747898697853088, 'timestamp': '2025-09-10 02:41:54.152522', 'step': 13520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:54.183217', 'step': 13520, 'epoch': 2} {'type': 'loss', 'content': 0.12771639227867126, 'timestamp': '2025-09-10 02:41:54.185776', 'step': 13521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:54.215511', 'step': 13521, 'epoch': 2} {'type': 'loss', 'content': 0.14998596906661987, 'timestamp': '2025-09-10 02:41:54.217988', 'step': 13522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:54.248090', 'step': 13522, 'epoch': 2} {'type': 'loss', 'content': 0.02844952791929245, 'timestamp': '2025-09-10 02:41:54.250514', 'step': 13523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:54.281247', 'step': 13523, 'epoch': 2} {'type': 'loss', 'content': 0.05134234577417374, 'timestamp': '2025-09-10 02:41:54.304625', 'step': 13524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:54.336078', 'step': 13524, 'epoch': 2} {'type': 'loss', 'content': 0.13168591260910034, 'timestamp': '2025-09-10 02:41:54.338743', 'step': 13525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:54.370929', 'step': 13525, 'epoch': 2} {'type': 'loss', 'content': 0.042989518493413925, 'timestamp': '2025-09-10 02:41:54.373841', 'step': 13526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:54.404197', 'step': 13526, 'epoch': 2} {'type': 'loss', 'content': 0.14105224609375, 'timestamp': '2025-09-10 02:41:54.406765', 'step': 13527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:54.438079', 'step': 13527, 'epoch': 2} {'type': 'loss', 'content': 0.09808475524187088, 'timestamp': '2025-09-10 02:41:54.461717', 'step': 13528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:54.492489', 'step': 13528, 'epoch': 2} {'type': 'loss', 'content': 0.12480189651250839, 'timestamp': '2025-09-10 02:41:54.494974', 'step': 13529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:54.525885', 'step': 13529, 'epoch': 2} {'type': 'loss', 'content': 0.08613890409469604, 'timestamp': '2025-09-10 02:41:54.529096', 'step': 13530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:54.559512', 'step': 13530, 'epoch': 2} {'type': 'loss', 'content': 0.15925107896327972, 'timestamp': '2025-09-10 02:41:54.561731', 'step': 13531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:54.592591', 'step': 13531, 'epoch': 2} {'type': 'loss', 'content': 0.07083083689212799, 'timestamp': '2025-09-10 02:41:54.616221', 'step': 13532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:54.648301', 'step': 13532, 'epoch': 2} {'type': 'loss', 'content': 0.12349262833595276, 'timestamp': '2025-09-10 02:41:54.650536', 'step': 13533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:54.680819', 'step': 13533, 'epoch': 2} {'type': 'loss', 'content': 0.10182517766952515, 'timestamp': '2025-09-10 02:41:54.683260', 'step': 13534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:54.713573', 'step': 13534, 'epoch': 2} {'type': 'loss', 'content': 0.10307428985834122, 'timestamp': '2025-09-10 02:41:54.715927', 'step': 13535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:54.762921', 'step': 13535, 'epoch': 2} {'type': 'loss', 'content': 0.1338232010602951, 'timestamp': '2025-09-10 02:41:54.786564', 'step': 13536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:54.816824', 'step': 13536, 'epoch': 2} {'type': 'loss', 'content': 0.03642471879720688, 'timestamp': '2025-09-10 02:41:54.819359', 'step': 13537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:54.850439', 'step': 13537, 'epoch': 2} {'type': 'loss', 'content': 0.06750109046697617, 'timestamp': '2025-09-10 02:41:54.853029', 'step': 13538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:54.882817', 'step': 13538, 'epoch': 2} {'type': 'loss', 'content': 0.09638553857803345, 'timestamp': '2025-09-10 02:41:54.886927', 'step': 13539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:54.918543', 'step': 13539, 'epoch': 2} {'type': 'loss', 'content': 0.10666268318891525, 'timestamp': '2025-09-10 02:41:54.942264', 'step': 13540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:54.974189', 'step': 13540, 'epoch': 2} {'type': 'loss', 'content': 0.19397631287574768, 'timestamp': '2025-09-10 02:41:54.976529', 'step': 13541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:55.006598', 'step': 13541, 'epoch': 2} {'type': 'loss', 'content': 0.07138703018426895, 'timestamp': '2025-09-10 02:41:55.009236', 'step': 13542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:55.039434', 'step': 13542, 'epoch': 2} {'type': 'loss', 'content': 0.13350529968738556, 'timestamp': '2025-09-10 02:41:55.041855', 'step': 13543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.074729', 'step': 13543, 'epoch': 2} {'type': 'loss', 'content': 0.10222823172807693, 'timestamp': '2025-09-10 02:41:55.098636', 'step': 13544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:55.130186', 'step': 13544, 'epoch': 2} {'type': 'loss', 'content': 0.13050296902656555, 'timestamp': '2025-09-10 02:41:55.132568', 'step': 13545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.163227', 'step': 13545, 'epoch': 2} {'type': 'loss', 'content': 0.13414253294467926, 'timestamp': '2025-09-10 02:41:55.166873', 'step': 13546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:55.199010', 'step': 13546, 'epoch': 2} {'type': 'loss', 'content': 0.19777609407901764, 'timestamp': '2025-09-10 02:41:55.201259', 'step': 13547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:55.231666', 'step': 13547, 'epoch': 2} {'type': 'loss', 'content': 0.15462443232536316, 'timestamp': '2025-09-10 02:41:55.254975', 'step': 13548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.285552', 'step': 13548, 'epoch': 2} {'type': 'loss', 'content': 0.04626927524805069, 'timestamp': '2025-09-10 02:41:55.287998', 'step': 13549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.318930', 'step': 13549, 'epoch': 2} {'type': 'loss', 'content': 0.08204786479473114, 'timestamp': '2025-09-10 02:41:55.321527', 'step': 13550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:55.352841', 'step': 13550, 'epoch': 2} {'type': 'loss', 'content': 0.10443995893001556, 'timestamp': '2025-09-10 02:41:55.356332', 'step': 13551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:55.386408', 'step': 13551, 'epoch': 2} {'type': 'loss', 'content': 0.12053011357784271, 'timestamp': '2025-09-10 02:41:55.410828', 'step': 13552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.441197', 'step': 13552, 'epoch': 2} {'type': 'loss', 'content': 0.11528786271810532, 'timestamp': '2025-09-10 02:41:55.443442', 'step': 13553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.474045', 'step': 13553, 'epoch': 2} {'type': 'loss', 'content': 0.15504498779773712, 'timestamp': '2025-09-10 02:41:55.482435', 'step': 13554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.517028', 'step': 13554, 'epoch': 2} {'type': 'loss', 'content': 0.16399283707141876, 'timestamp': '2025-09-10 02:41:55.519728', 'step': 13555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.550256', 'step': 13555, 'epoch': 2} {'type': 'loss', 'content': 0.13632141053676605, 'timestamp': '2025-09-10 02:41:55.573930', 'step': 13556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:55.603880', 'step': 13556, 'epoch': 2} {'type': 'loss', 'content': 0.1023382917046547, 'timestamp': '2025-09-10 02:41:55.606485', 'step': 13557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.636772', 'step': 13557, 'epoch': 2} {'type': 'loss', 'content': 0.09733119606971741, 'timestamp': '2025-09-10 02:41:55.639389', 'step': 13558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:55.669672', 'step': 13558, 'epoch': 2} {'type': 'loss', 'content': 0.0971747636795044, 'timestamp': '2025-09-10 02:41:55.672070', 'step': 13559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:55.702407', 'step': 13559, 'epoch': 2} {'type': 'loss', 'content': 0.0716276690363884, 'timestamp': '2025-09-10 02:41:55.726710', 'step': 13560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:55.757821', 'step': 13560, 'epoch': 2} {'type': 'loss', 'content': 0.08344651758670807, 'timestamp': '2025-09-10 02:41:55.761910', 'step': 13561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.792873', 'step': 13561, 'epoch': 2} {'type': 'loss', 'content': 0.045056700706481934, 'timestamp': '2025-09-10 02:41:55.794884', 'step': 13562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:55.825054', 'step': 13562, 'epoch': 2} {'type': 'loss', 'content': 0.12401862442493439, 'timestamp': '2025-09-10 02:41:55.827326', 'step': 13563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:55.857324', 'step': 13563, 'epoch': 2} {'type': 'loss', 'content': 0.1241583377122879, 'timestamp': '2025-09-10 02:41:55.882629', 'step': 13564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:55.913708', 'step': 13564, 'epoch': 2} {'type': 'loss', 'content': 0.09900685399770737, 'timestamp': '2025-09-10 02:41:55.916355', 'step': 13565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:55.947363', 'step': 13565, 'epoch': 2} {'type': 'loss', 'content': 0.10193932801485062, 'timestamp': '2025-09-10 02:41:55.949868', 'step': 13566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:55.980122', 'step': 13566, 'epoch': 2} {'type': 'loss', 'content': 0.18199823796749115, 'timestamp': '2025-09-10 02:41:55.982527', 'step': 13567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:56.012718', 'step': 13567, 'epoch': 2} {'type': 'loss', 'content': 0.11377469450235367, 'timestamp': '2025-09-10 02:41:56.036532', 'step': 13568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:56.067864', 'step': 13568, 'epoch': 2} {'type': 'loss', 'content': 0.12943750619888306, 'timestamp': '2025-09-10 02:41:56.070472', 'step': 13569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.100813', 'step': 13569, 'epoch': 2} {'type': 'loss', 'content': 0.07754876464605331, 'timestamp': '2025-09-10 02:41:56.103276', 'step': 13570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.133908', 'step': 13570, 'epoch': 2} {'type': 'loss', 'content': 0.11192905902862549, 'timestamp': '2025-09-10 02:41:56.136587', 'step': 13571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:56.168022', 'step': 13571, 'epoch': 2} {'type': 'loss', 'content': 0.1258799284696579, 'timestamp': '2025-09-10 02:41:56.192845', 'step': 13572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.225012', 'step': 13572, 'epoch': 2} {'type': 'loss', 'content': 0.1164003238081932, 'timestamp': '2025-09-10 02:41:56.227245', 'step': 13573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.258614', 'step': 13573, 'epoch': 2} {'type': 'loss', 'content': 0.12001052498817444, 'timestamp': '2025-09-10 02:41:56.261180', 'step': 13574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:56.294189', 'step': 13574, 'epoch': 2} {'type': 'loss', 'content': 0.06690455228090286, 'timestamp': '2025-09-10 02:41:56.297281', 'step': 13575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.328968', 'step': 13575, 'epoch': 2} {'type': 'loss', 'content': 0.1396092176437378, 'timestamp': '2025-09-10 02:41:56.353243', 'step': 13576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:56.384045', 'step': 13576, 'epoch': 2} {'type': 'loss', 'content': 0.10684267431497574, 'timestamp': '2025-09-10 02:41:56.386300', 'step': 13577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:56.416650', 'step': 13577, 'epoch': 2} {'type': 'loss', 'content': 0.09441975504159927, 'timestamp': '2025-09-10 02:41:56.419049', 'step': 13578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:56.449733', 'step': 13578, 'epoch': 2} {'type': 'loss', 'content': 0.08396758139133453, 'timestamp': '2025-09-10 02:41:56.452163', 'step': 13579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:56.482395', 'step': 13579, 'epoch': 2} {'type': 'loss', 'content': 0.12745462357997894, 'timestamp': '2025-09-10 02:41:56.506090', 'step': 13580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:56.536393', 'step': 13580, 'epoch': 2} {'type': 'loss', 'content': 0.07951928675174713, 'timestamp': '2025-09-10 02:41:56.538652', 'step': 13581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:56.569130', 'step': 13581, 'epoch': 2} {'type': 'loss', 'content': 0.12942387163639069, 'timestamp': '2025-09-10 02:41:56.572189', 'step': 13582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.602673', 'step': 13582, 'epoch': 2} {'type': 'loss', 'content': 0.13860933482646942, 'timestamp': '2025-09-10 02:41:56.606087', 'step': 13583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.638132', 'step': 13583, 'epoch': 2} {'type': 'loss', 'content': 0.07304225862026215, 'timestamp': '2025-09-10 02:41:56.661921', 'step': 13584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:56.693210', 'step': 13584, 'epoch': 2} {'type': 'loss', 'content': 0.15332812070846558, 'timestamp': '2025-09-10 02:41:56.696019', 'step': 13585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.727631', 'step': 13585, 'epoch': 2} {'type': 'loss', 'content': 0.14043064415454865, 'timestamp': '2025-09-10 02:41:56.730073', 'step': 13586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:56.761667', 'step': 13586, 'epoch': 2} {'type': 'loss', 'content': 0.06297862529754639, 'timestamp': '2025-09-10 02:41:56.764082', 'step': 13587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:56.794083', 'step': 13587, 'epoch': 2} {'type': 'loss', 'content': 0.13554638624191284, 'timestamp': '2025-09-10 02:41:56.817609', 'step': 13588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.848053', 'step': 13588, 'epoch': 2} {'type': 'loss', 'content': 0.07262467592954636, 'timestamp': '2025-09-10 02:41:56.850746', 'step': 13589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.881289', 'step': 13589, 'epoch': 2} {'type': 'loss', 'content': 0.0856674388051033, 'timestamp': '2025-09-10 02:41:56.883806', 'step': 13590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:56.914730', 'step': 13590, 'epoch': 2} {'type': 'loss', 'content': 0.0903133973479271, 'timestamp': '2025-09-10 02:41:56.917071', 'step': 13591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:56.947782', 'step': 13591, 'epoch': 2} {'type': 'loss', 'content': 0.1946713775396347, 'timestamp': '2025-09-10 02:41:56.971276', 'step': 13592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:57.003002', 'step': 13592, 'epoch': 2} {'type': 'loss', 'content': 0.13578423857688904, 'timestamp': '2025-09-10 02:41:57.005396', 'step': 13593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:57.035419', 'step': 13593, 'epoch': 2} {'type': 'loss', 'content': 0.07347223162651062, 'timestamp': '2025-09-10 02:41:57.037728', 'step': 13594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.067559', 'step': 13594, 'epoch': 2} {'type': 'loss', 'content': 0.14415782690048218, 'timestamp': '2025-09-10 02:41:57.069998', 'step': 13595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:57.101084', 'step': 13595, 'epoch': 2} {'type': 'loss', 'content': 0.0730859711766243, 'timestamp': '2025-09-10 02:41:57.124646', 'step': 13596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:57.155377', 'step': 13596, 'epoch': 2} {'type': 'loss', 'content': 0.12012115865945816, 'timestamp': '2025-09-10 02:41:57.158134', 'step': 13597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.188280', 'step': 13597, 'epoch': 2} {'type': 'loss', 'content': 0.1952066868543625, 'timestamp': '2025-09-10 02:41:57.190980', 'step': 13598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:57.222213', 'step': 13598, 'epoch': 2} {'type': 'loss', 'content': 0.17332711815834045, 'timestamp': '2025-09-10 02:41:57.224533', 'step': 13599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.255304', 'step': 13599, 'epoch': 2} {'type': 'loss', 'content': 0.07207391411066055, 'timestamp': '2025-09-10 02:41:57.279020', 'step': 13600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:57.311149', 'step': 13600, 'epoch': 2} {'type': 'loss', 'content': 0.07554148137569427, 'timestamp': '2025-09-10 02:41:57.313625', 'step': 13601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:57.344363', 'step': 13601, 'epoch': 2} {'type': 'loss', 'content': 0.15958410501480103, 'timestamp': '2025-09-10 02:41:57.347688', 'step': 13602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.378593', 'step': 13602, 'epoch': 2} {'type': 'loss', 'content': 0.0983385369181633, 'timestamp': '2025-09-10 02:41:57.381797', 'step': 13603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:57.412772', 'step': 13603, 'epoch': 2} {'type': 'loss', 'content': 0.07737787067890167, 'timestamp': '2025-09-10 02:41:57.436512', 'step': 13604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:57.466639', 'step': 13604, 'epoch': 2} {'type': 'loss', 'content': 0.1785830706357956, 'timestamp': '2025-09-10 02:41:57.469587', 'step': 13605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.501004', 'step': 13605, 'epoch': 2} {'type': 'loss', 'content': 0.08283577114343643, 'timestamp': '2025-09-10 02:41:57.503573', 'step': 13606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.534188', 'step': 13606, 'epoch': 2} {'type': 'loss', 'content': 0.12399740517139435, 'timestamp': '2025-09-10 02:41:57.536398', 'step': 13607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:57.566930', 'step': 13607, 'epoch': 2} {'type': 'loss', 'content': 0.10490681976079941, 'timestamp': '2025-09-10 02:41:57.590511', 'step': 13608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.621170', 'step': 13608, 'epoch': 2} {'type': 'loss', 'content': 0.09787771850824356, 'timestamp': '2025-09-10 02:41:57.623828', 'step': 13609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:57.655398', 'step': 13609, 'epoch': 2} {'type': 'loss', 'content': 0.11320000141859055, 'timestamp': '2025-09-10 02:41:57.657658', 'step': 13610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:57.687753', 'step': 13610, 'epoch': 2} {'type': 'loss', 'content': 0.09372492879629135, 'timestamp': '2025-09-10 02:41:57.690415', 'step': 13611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:57.721236', 'step': 13611, 'epoch': 2} {'type': 'loss', 'content': 0.07991869002580643, 'timestamp': '2025-09-10 02:41:57.745044', 'step': 13612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.776029', 'step': 13612, 'epoch': 2} {'type': 'loss', 'content': 0.18839211761951447, 'timestamp': '2025-09-10 02:41:57.778400', 'step': 13613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:57.810122', 'step': 13613, 'epoch': 2} {'type': 'loss', 'content': 0.08844801038503647, 'timestamp': '2025-09-10 02:41:57.812433', 'step': 13614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.842903', 'step': 13614, 'epoch': 2} {'type': 'loss', 'content': 0.11454012989997864, 'timestamp': '2025-09-10 02:41:57.845272', 'step': 13615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:57.875487', 'step': 13615, 'epoch': 2} {'type': 'loss', 'content': 0.15755639970302582, 'timestamp': '2025-09-10 02:41:57.899316', 'step': 13616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:57.930020', 'step': 13616, 'epoch': 2} {'type': 'loss', 'content': 0.11693556606769562, 'timestamp': '2025-09-10 02:41:57.932613', 'step': 13617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:57.963229', 'step': 13617, 'epoch': 2} {'type': 'loss', 'content': 0.21022623777389526, 'timestamp': '2025-09-10 02:41:57.965842', 'step': 13618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:57.996155', 'step': 13618, 'epoch': 2} {'type': 'loss', 'content': 0.10670856386423111, 'timestamp': '2025-09-10 02:41:57.998411', 'step': 13619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.028993', 'step': 13619, 'epoch': 2} {'type': 'loss', 'content': 0.04519170895218849, 'timestamp': '2025-09-10 02:41:58.052778', 'step': 13620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.083649', 'step': 13620, 'epoch': 2} {'type': 'loss', 'content': 0.07244999706745148, 'timestamp': '2025-09-10 02:41:58.085971', 'step': 13621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.117856', 'step': 13621, 'epoch': 2} {'type': 'loss', 'content': 0.12384378910064697, 'timestamp': '2025-09-10 02:41:58.120333', 'step': 13622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:58.151043', 'step': 13622, 'epoch': 2} {'type': 'loss', 'content': 0.07240203022956848, 'timestamp': '2025-09-10 02:41:58.154349', 'step': 13623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.185059', 'step': 13623, 'epoch': 2} {'type': 'loss', 'content': 0.10552635788917542, 'timestamp': '2025-09-10 02:41:58.208626', 'step': 13624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.241797', 'step': 13624, 'epoch': 2} {'type': 'loss', 'content': 0.11316198855638504, 'timestamp': '2025-09-10 02:41:58.243779', 'step': 13625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:58.274912', 'step': 13625, 'epoch': 2} {'type': 'loss', 'content': 0.05876943841576576, 'timestamp': '2025-09-10 02:41:58.279396', 'step': 13626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:58.310744', 'step': 13626, 'epoch': 2} {'type': 'loss', 'content': 0.1003623753786087, 'timestamp': '2025-09-10 02:41:58.312896', 'step': 13627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:58.344827', 'step': 13627, 'epoch': 2} {'type': 'loss', 'content': 0.07979408651590347, 'timestamp': '2025-09-10 02:41:58.368582', 'step': 13628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.400854', 'step': 13628, 'epoch': 2} {'type': 'loss', 'content': 0.08926229178905487, 'timestamp': '2025-09-10 02:41:58.403556', 'step': 13629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.436642', 'step': 13629, 'epoch': 2} {'type': 'loss', 'content': 0.0767890214920044, 'timestamp': '2025-09-10 02:41:58.438511', 'step': 13630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:58.469205', 'step': 13630, 'epoch': 2} {'type': 'loss', 'content': 0.09262873977422714, 'timestamp': '2025-09-10 02:41:58.475509', 'step': 13631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.516387', 'step': 13631, 'epoch': 2} {'type': 'loss', 'content': 0.09019757807254791, 'timestamp': '2025-09-10 02:41:58.539745', 'step': 13632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.570691', 'step': 13632, 'epoch': 2} {'type': 'loss', 'content': 0.03891913220286369, 'timestamp': '2025-09-10 02:41:58.573823', 'step': 13633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:58.605172', 'step': 13633, 'epoch': 2} {'type': 'loss', 'content': 0.15188652276992798, 'timestamp': '2025-09-10 02:41:58.608314', 'step': 13634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:58.644027', 'step': 13634, 'epoch': 2} {'type': 'loss', 'content': 0.08830320835113525, 'timestamp': '2025-09-10 02:41:58.646419', 'step': 13635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.676943', 'step': 13635, 'epoch': 2} {'type': 'loss', 'content': 0.08583005517721176, 'timestamp': '2025-09-10 02:41:58.701293', 'step': 13636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:58.731349', 'step': 13636, 'epoch': 2} {'type': 'loss', 'content': 0.11806810647249222, 'timestamp': '2025-09-10 02:41:58.734199', 'step': 13637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.767640', 'step': 13637, 'epoch': 2} {'type': 'loss', 'content': 0.13367009162902832, 'timestamp': '2025-09-10 02:41:58.769720', 'step': 13638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:58.800589', 'step': 13638, 'epoch': 2} {'type': 'loss', 'content': 0.07668883353471756, 'timestamp': '2025-09-10 02:41:58.803485', 'step': 13639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:58.845624', 'step': 13639, 'epoch': 2} {'type': 'loss', 'content': 0.1481923609972, 'timestamp': '2025-09-10 02:41:58.869509', 'step': 13640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:58.902299', 'step': 13640, 'epoch': 2} {'type': 'loss', 'content': 0.06421003490686417, 'timestamp': '2025-09-10 02:41:58.907242', 'step': 13641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:58.942548', 'step': 13641, 'epoch': 2} {'type': 'loss', 'content': 0.09492193162441254, 'timestamp': '2025-09-10 02:41:58.945773', 'step': 13642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:58.977460', 'step': 13642, 'epoch': 2} {'type': 'loss', 'content': 0.14760303497314453, 'timestamp': '2025-09-10 02:41:58.980302', 'step': 13643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.027394', 'step': 13643, 'epoch': 2} {'type': 'loss', 'content': 0.11387157440185547, 'timestamp': '2025-09-10 02:41:59.051109', 'step': 13644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:59.081609', 'step': 13644, 'epoch': 2} {'type': 'loss', 'content': 0.11118540167808533, 'timestamp': '2025-09-10 02:41:59.086394', 'step': 13645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:41:59.122996', 'step': 13645, 'epoch': 2} {'type': 'loss', 'content': 0.0704311653971672, 'timestamp': '2025-09-10 02:41:59.125910', 'step': 13646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.163325', 'step': 13646, 'epoch': 2} {'type': 'loss', 'content': 0.09930751472711563, 'timestamp': '2025-09-10 02:41:59.165971', 'step': 13647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.196603', 'step': 13647, 'epoch': 2} {'type': 'loss', 'content': 0.06494640558958054, 'timestamp': '2025-09-10 02:41:59.220232', 'step': 13648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:59.259646', 'step': 13648, 'epoch': 2} {'type': 'loss', 'content': 0.032467544078826904, 'timestamp': '2025-09-10 02:41:59.264726', 'step': 13649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:59.296731', 'step': 13649, 'epoch': 2} {'type': 'loss', 'content': 0.17785508930683136, 'timestamp': '2025-09-10 02:41:59.299027', 'step': 13650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:59.334141', 'step': 13650, 'epoch': 2} {'type': 'loss', 'content': 0.05505235120654106, 'timestamp': '2025-09-10 02:41:59.336392', 'step': 13651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:59.367494', 'step': 13651, 'epoch': 2} {'type': 'loss', 'content': 0.17777754366397858, 'timestamp': '2025-09-10 02:41:59.391709', 'step': 13652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:41:59.427524', 'step': 13652, 'epoch': 2} {'type': 'loss', 'content': 0.1613215059041977, 'timestamp': '2025-09-10 02:41:59.429963', 'step': 13653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.460912', 'step': 13653, 'epoch': 2} {'type': 'loss', 'content': 0.18374544382095337, 'timestamp': '2025-09-10 02:41:59.463598', 'step': 13654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:59.495124', 'step': 13654, 'epoch': 2} {'type': 'loss', 'content': 0.13838925957679749, 'timestamp': '2025-09-10 02:41:59.498696', 'step': 13655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.529201', 'step': 13655, 'epoch': 2} {'type': 'loss', 'content': 0.08155754208564758, 'timestamp': '2025-09-10 02:41:59.552911', 'step': 13656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.583988', 'step': 13656, 'epoch': 2} {'type': 'loss', 'content': 0.10742033272981644, 'timestamp': '2025-09-10 02:41:59.586458', 'step': 13657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:59.617627', 'step': 13657, 'epoch': 2} {'type': 'loss', 'content': 0.044709689915180206, 'timestamp': '2025-09-10 02:41:59.620110', 'step': 13658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.650625', 'step': 13658, 'epoch': 2} {'type': 'loss', 'content': 0.06744424998760223, 'timestamp': '2025-09-10 02:41:59.653473', 'step': 13659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:59.685927', 'step': 13659, 'epoch': 2} {'type': 'loss', 'content': 0.06226765736937523, 'timestamp': '2025-09-10 02:41:59.709386', 'step': 13660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.741447', 'step': 13660, 'epoch': 2} {'type': 'loss', 'content': 0.1912432461977005, 'timestamp': '2025-09-10 02:41:59.743876', 'step': 13661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:59.774335', 'step': 13661, 'epoch': 2} {'type': 'loss', 'content': 0.10149761289358139, 'timestamp': '2025-09-10 02:41:59.776690', 'step': 13662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.807008', 'step': 13662, 'epoch': 2} {'type': 'loss', 'content': 0.1113969087600708, 'timestamp': '2025-09-10 02:41:59.809585', 'step': 13663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:59.842568', 'step': 13663, 'epoch': 2} {'type': 'loss', 'content': 0.12304574996232986, 'timestamp': '2025-09-10 02:41:59.865865', 'step': 13664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:41:59.897043', 'step': 13664, 'epoch': 2} {'type': 'loss', 'content': 0.09097161144018173, 'timestamp': '2025-09-10 02:41:59.899674', 'step': 13665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:41:59.929816', 'step': 13665, 'epoch': 2} {'type': 'loss', 'content': 0.19755566120147705, 'timestamp': '2025-09-10 02:41:59.933799', 'step': 13666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:41:59.965972', 'step': 13666, 'epoch': 2} {'type': 'loss', 'content': 0.08604298532009125, 'timestamp': '2025-09-10 02:41:59.968092', 'step': 13667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:41:59.998327', 'step': 13667, 'epoch': 2} {'type': 'loss', 'content': 0.0973593220114708, 'timestamp': '2025-09-10 02:42:00.021677', 'step': 13668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:00.076019', 'step': 13668, 'epoch': 2} {'type': 'loss', 'content': 0.1407621204853058, 'timestamp': '2025-09-10 02:42:00.078397', 'step': 13669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:00.111985', 'step': 13669, 'epoch': 2} {'type': 'loss', 'content': 0.14302833378314972, 'timestamp': '2025-09-10 02:42:00.119806', 'step': 13670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.158137', 'step': 13670, 'epoch': 2} {'type': 'loss', 'content': 0.14146333932876587, 'timestamp': '2025-09-10 02:42:00.162299', 'step': 13671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:00.195563', 'step': 13671, 'epoch': 2} {'type': 'loss', 'content': 0.0998990535736084, 'timestamp': '2025-09-10 02:42:00.221778', 'step': 13672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.258258', 'step': 13672, 'epoch': 2} {'type': 'loss', 'content': 0.13840651512145996, 'timestamp': '2025-09-10 02:42:00.261351', 'step': 13673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:00.290982', 'step': 13673, 'epoch': 2} {'type': 'loss', 'content': 0.10110539942979813, 'timestamp': '2025-09-10 02:42:00.293388', 'step': 13674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.324545', 'step': 13674, 'epoch': 2} {'type': 'loss', 'content': 0.05979341268539429, 'timestamp': '2025-09-10 02:42:00.335669', 'step': 13675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:00.370029', 'step': 13675, 'epoch': 2} {'type': 'loss', 'content': 0.08111549913883209, 'timestamp': '2025-09-10 02:42:00.394942', 'step': 13676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.426897', 'step': 13676, 'epoch': 2} {'type': 'loss', 'content': 0.08024588227272034, 'timestamp': '2025-09-10 02:42:00.430429', 'step': 13677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:00.469777', 'step': 13677, 'epoch': 2} {'type': 'loss', 'content': 0.1658155918121338, 'timestamp': '2025-09-10 02:42:00.473946', 'step': 13678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:00.506226', 'step': 13678, 'epoch': 2} {'type': 'loss', 'content': 0.1625027060508728, 'timestamp': '2025-09-10 02:42:00.512749', 'step': 13679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:00.548069', 'step': 13679, 'epoch': 2} {'type': 'loss', 'content': 0.11484618484973907, 'timestamp': '2025-09-10 02:42:00.572151', 'step': 13680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.606316', 'step': 13680, 'epoch': 2} {'type': 'loss', 'content': 0.07354656606912613, 'timestamp': '2025-09-10 02:42:00.611231', 'step': 13681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:00.658152', 'step': 13681, 'epoch': 2} {'type': 'loss', 'content': 0.08592382073402405, 'timestamp': '2025-09-10 02:42:00.661789', 'step': 13682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.693040', 'step': 13682, 'epoch': 2} {'type': 'loss', 'content': 0.08078622072935104, 'timestamp': '2025-09-10 02:42:00.695598', 'step': 13683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.726494', 'step': 13683, 'epoch': 2} {'type': 'loss', 'content': 0.09843799471855164, 'timestamp': '2025-09-10 02:42:00.750268', 'step': 13684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:00.780967', 'step': 13684, 'epoch': 2} {'type': 'loss', 'content': 0.020835500210523605, 'timestamp': '2025-09-10 02:42:00.783267', 'step': 13685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:00.813341', 'step': 13685, 'epoch': 2} {'type': 'loss', 'content': 0.08969929814338684, 'timestamp': '2025-09-10 02:42:00.815571', 'step': 13686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.851777', 'step': 13686, 'epoch': 2} {'type': 'loss', 'content': 0.16170072555541992, 'timestamp': '2025-09-10 02:42:00.854101', 'step': 13687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:00.884839', 'step': 13687, 'epoch': 2} {'type': 'loss', 'content': 0.17892585694789886, 'timestamp': '2025-09-10 02:42:00.908222', 'step': 13688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.941940', 'step': 13688, 'epoch': 2} {'type': 'loss', 'content': 0.05764545127749443, 'timestamp': '2025-09-10 02:42:00.944331', 'step': 13689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:00.974751', 'step': 13689, 'epoch': 2} {'type': 'loss', 'content': 0.09994716197252274, 'timestamp': '2025-09-10 02:42:00.978047', 'step': 13690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:01.018348', 'step': 13690, 'epoch': 2} {'type': 'loss', 'content': 0.06584997475147247, 'timestamp': '2025-09-10 02:42:01.020871', 'step': 13691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:01.051860', 'step': 13691, 'epoch': 2} {'type': 'loss', 'content': 0.19414122402668, 'timestamp': '2025-09-10 02:42:01.075797', 'step': 13692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:01.106313', 'step': 13692, 'epoch': 2} {'type': 'loss', 'content': 0.06806999444961548, 'timestamp': '2025-09-10 02:42:01.108482', 'step': 13693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:01.143246', 'step': 13693, 'epoch': 2} {'type': 'loss', 'content': 0.12127039581537247, 'timestamp': '2025-09-10 02:42:01.145495', 'step': 13694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:01.175894', 'step': 13694, 'epoch': 2} {'type': 'loss', 'content': 0.10800353437662125, 'timestamp': '2025-09-10 02:42:01.178405', 'step': 13695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:01.212350', 'step': 13695, 'epoch': 2} {'type': 'loss', 'content': 0.07874824851751328, 'timestamp': '2025-09-10 02:42:01.241021', 'step': 13696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:01.286622', 'step': 13696, 'epoch': 2} {'type': 'loss', 'content': 0.0931728184223175, 'timestamp': '2025-09-10 02:42:01.292243', 'step': 13697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:01.326251', 'step': 13697, 'epoch': 2} {'type': 'loss', 'content': 0.0729106068611145, 'timestamp': '2025-09-10 02:42:01.328537', 'step': 13698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:01.365569', 'step': 13698, 'epoch': 2} {'type': 'loss', 'content': 0.14865998923778534, 'timestamp': '2025-09-10 02:42:01.368005', 'step': 13699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:01.401732', 'step': 13699, 'epoch': 2} {'type': 'loss', 'content': 0.12395355105400085, 'timestamp': '2025-09-10 02:42:01.425983', 'step': 13700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:01.461730', 'step': 13700, 'epoch': 2} {'type': 'loss', 'content': 0.09176042675971985, 'timestamp': '2025-09-10 02:42:01.466322', 'step': 13701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:01.498630', 'step': 13701, 'epoch': 2} {'type': 'loss', 'content': 0.1961604505777359, 'timestamp': '2025-09-10 02:42:01.501399', 'step': 13702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:01.532118', 'step': 13702, 'epoch': 2} {'type': 'loss', 'content': 0.0731605514883995, 'timestamp': '2025-09-10 02:42:01.536002', 'step': 13703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:01.568209', 'step': 13703, 'epoch': 2} {'type': 'loss', 'content': 0.07665665447711945, 'timestamp': '2025-09-10 02:42:01.592333', 'step': 13704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:01.643110', 'step': 13704, 'epoch': 2} {'type': 'loss', 'content': 0.03029286116361618, 'timestamp': '2025-09-10 02:42:01.646384', 'step': 13705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:01.676589', 'step': 13705, 'epoch': 2} {'type': 'loss', 'content': 0.07625491172075272, 'timestamp': '2025-09-10 02:42:01.678878', 'step': 13706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:01.709304', 'step': 13706, 'epoch': 2} {'type': 'loss', 'content': 0.11523596942424774, 'timestamp': '2025-09-10 02:42:01.711609', 'step': 13707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:01.741713', 'step': 13707, 'epoch': 2} {'type': 'loss', 'content': 0.06346060335636139, 'timestamp': '2025-09-10 02:42:01.766589', 'step': 13708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:01.798130', 'step': 13708, 'epoch': 2} {'type': 'loss', 'content': 0.10066962242126465, 'timestamp': '2025-09-10 02:42:01.800594', 'step': 13709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:01.831367', 'step': 13709, 'epoch': 2} {'type': 'loss', 'content': 0.18483248353004456, 'timestamp': '2025-09-10 02:42:01.833857', 'step': 13710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:01.866152', 'step': 13710, 'epoch': 2} {'type': 'loss', 'content': 0.0693647488951683, 'timestamp': '2025-09-10 02:42:01.868470', 'step': 13711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:01.900866', 'step': 13711, 'epoch': 2} {'type': 'loss', 'content': 0.08476730436086655, 'timestamp': '2025-09-10 02:42:01.925855', 'step': 13712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:01.956893', 'step': 13712, 'epoch': 2} {'type': 'loss', 'content': 0.04944045841693878, 'timestamp': '2025-09-10 02:42:01.959700', 'step': 13713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:01.991407', 'step': 13713, 'epoch': 2} {'type': 'loss', 'content': 0.21785219013690948, 'timestamp': '2025-09-10 02:42:01.999567', 'step': 13714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:02.038899', 'step': 13714, 'epoch': 2} {'type': 'loss', 'content': 0.1402987539768219, 'timestamp': '2025-09-10 02:42:02.042901', 'step': 13715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:42:02.076173', 'step': 13715, 'epoch': 2} {'type': 'loss', 'content': 0.0811428502202034, 'timestamp': '2025-09-10 02:42:02.101104', 'step': 13716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:02.134669', 'step': 13716, 'epoch': 2} {'type': 'loss', 'content': 0.10105273127555847, 'timestamp': '2025-09-10 02:42:02.136789', 'step': 13717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:02.172506', 'step': 13717, 'epoch': 2} {'type': 'loss', 'content': 0.11122803390026093, 'timestamp': '2025-09-10 02:42:02.174952', 'step': 13718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:42:02.206772', 'step': 13718, 'epoch': 2} {'type': 'loss', 'content': 0.12947452068328857, 'timestamp': '2025-09-10 02:42:02.211026', 'step': 13719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:02.242572', 'step': 13719, 'epoch': 2} {'type': 'loss', 'content': 0.09969789534807205, 'timestamp': '2025-09-10 02:42:02.266166', 'step': 13720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:02.296232', 'step': 13720, 'epoch': 2} {'type': 'loss', 'content': 0.03796382620930672, 'timestamp': '2025-09-10 02:42:02.298378', 'step': 13721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:02.329622', 'step': 13721, 'epoch': 2} {'type': 'loss', 'content': 0.04508553445339203, 'timestamp': '2025-09-10 02:42:02.337386', 'step': 13722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:02.371213', 'step': 13722, 'epoch': 2} {'type': 'loss', 'content': 0.07792533934116364, 'timestamp': '2025-09-10 02:42:02.373524', 'step': 13723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:02.403971', 'step': 13723, 'epoch': 2} {'type': 'loss', 'content': 0.14557769894599915, 'timestamp': '2025-09-10 02:42:02.426806', 'step': 13724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:02.468258', 'step': 13724, 'epoch': 2} {'type': 'loss', 'content': 0.1074882298707962, 'timestamp': '2025-09-10 02:42:02.470117', 'step': 13725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:02.501642', 'step': 13725, 'epoch': 2} {'type': 'loss', 'content': 0.11684897541999817, 'timestamp': '2025-09-10 02:42:02.506244', 'step': 13726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:02.541380', 'step': 13726, 'epoch': 2} {'type': 'loss', 'content': 0.0934339240193367, 'timestamp': '2025-09-10 02:42:02.543378', 'step': 13727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:02.575567', 'step': 13727, 'epoch': 2} {'type': 'loss', 'content': 0.10868549346923828, 'timestamp': '2025-09-10 02:42:02.600447', 'step': 13728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:02.635878', 'step': 13728, 'epoch': 2} {'type': 'loss', 'content': 0.161169171333313, 'timestamp': '2025-09-10 02:42:02.640392', 'step': 13729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:02.676382', 'step': 13729, 'epoch': 2} {'type': 'loss', 'content': 0.09349915385246277, 'timestamp': '2025-09-10 02:42:02.678250', 'step': 13730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:02.709669', 'step': 13730, 'epoch': 2} {'type': 'loss', 'content': 0.10098583996295929, 'timestamp': '2025-09-10 02:42:02.712574', 'step': 13731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:02.744533', 'step': 13731, 'epoch': 2} {'type': 'loss', 'content': 0.10426218062639236, 'timestamp': '2025-09-10 02:42:02.770434', 'step': 13732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:02.805932', 'step': 13732, 'epoch': 2} {'type': 'loss', 'content': 0.06252585351467133, 'timestamp': '2025-09-10 02:42:02.811306', 'step': 13733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:02.844466', 'step': 13733, 'epoch': 2} {'type': 'loss', 'content': 0.12842898070812225, 'timestamp': '2025-09-10 02:42:02.850203', 'step': 13734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:02.884317', 'step': 13734, 'epoch': 2} {'type': 'loss', 'content': 0.09742669016122818, 'timestamp': '2025-09-10 02:42:02.887183', 'step': 13735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:02.917920', 'step': 13735, 'epoch': 2} {'type': 'loss', 'content': 0.18457214534282684, 'timestamp': '2025-09-10 02:42:02.948725', 'step': 13736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:02.983404', 'step': 13736, 'epoch': 2} {'type': 'loss', 'content': 0.04183575510978699, 'timestamp': '2025-09-10 02:42:02.985584', 'step': 13737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:03.015847', 'step': 13737, 'epoch': 2} {'type': 'loss', 'content': 0.1615886688232422, 'timestamp': '2025-09-10 02:42:03.018072', 'step': 13738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:03.050387', 'step': 13738, 'epoch': 2} {'type': 'loss', 'content': 0.10500644147396088, 'timestamp': '2025-09-10 02:42:03.053506', 'step': 13739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:03.088046', 'step': 13739, 'epoch': 2} {'type': 'loss', 'content': 0.0939381867647171, 'timestamp': '2025-09-10 02:42:03.111759', 'step': 13740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:03.144268', 'step': 13740, 'epoch': 2} {'type': 'loss', 'content': 0.1278693526983261, 'timestamp': '2025-09-10 02:42:03.146739', 'step': 13741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:03.179185', 'step': 13741, 'epoch': 2} {'type': 'loss', 'content': 0.031561415642499924, 'timestamp': '2025-09-10 02:42:03.181364', 'step': 13742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:03.217222', 'step': 13742, 'epoch': 2} {'type': 'loss', 'content': 0.20430272817611694, 'timestamp': '2025-09-10 02:42:03.220532', 'step': 13743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:03.252036', 'step': 13743, 'epoch': 2} {'type': 'loss', 'content': 0.09426932781934738, 'timestamp': '2025-09-10 02:42:03.277231', 'step': 13744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:03.309457', 'step': 13744, 'epoch': 2} {'type': 'loss', 'content': 0.1397467702627182, 'timestamp': '2025-09-10 02:42:03.311595', 'step': 13745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:03.343263', 'step': 13745, 'epoch': 2} {'type': 'loss', 'content': 0.08849083632230759, 'timestamp': '2025-09-10 02:42:03.345572', 'step': 13746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:03.375777', 'step': 13746, 'epoch': 2} {'type': 'loss', 'content': 0.1002541109919548, 'timestamp': '2025-09-10 02:42:03.378559', 'step': 13747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:03.409845', 'step': 13747, 'epoch': 2} {'type': 'loss', 'content': 0.14283354580402374, 'timestamp': '2025-09-10 02:42:03.433473', 'step': 13748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:03.465414', 'step': 13748, 'epoch': 2} {'type': 'loss', 'content': 0.09850863367319107, 'timestamp': '2025-09-10 02:42:03.469518', 'step': 13749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:03.500749', 'step': 13749, 'epoch': 2} {'type': 'loss', 'content': 0.12691359221935272, 'timestamp': '2025-09-10 02:42:03.513544', 'step': 13750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:03.548969', 'step': 13750, 'epoch': 2} {'type': 'loss', 'content': 0.14352276921272278, 'timestamp': '2025-09-10 02:42:03.551514', 'step': 13751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:03.584343', 'step': 13751, 'epoch': 2} {'type': 'loss', 'content': 0.15831929445266724, 'timestamp': '2025-09-10 02:42:03.608729', 'step': 13752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:03.641793', 'step': 13752, 'epoch': 2} {'type': 'loss', 'content': 0.07570613920688629, 'timestamp': '2025-09-10 02:42:03.644494', 'step': 13753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:03.680705', 'step': 13753, 'epoch': 2} {'type': 'loss', 'content': 0.23674987256526947, 'timestamp': '2025-09-10 02:42:03.686737', 'step': 13754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:03.716966', 'step': 13754, 'epoch': 2} {'type': 'loss', 'content': 0.20335595309734344, 'timestamp': '2025-09-10 02:42:03.719174', 'step': 13755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:03.756470', 'step': 13755, 'epoch': 2} {'type': 'loss', 'content': 0.06945475190877914, 'timestamp': '2025-09-10 02:42:03.779938', 'step': 13756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:03.817026', 'step': 13756, 'epoch': 2} {'type': 'loss', 'content': 0.07261616736650467, 'timestamp': '2025-09-10 02:42:03.818903', 'step': 13757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:03.851171', 'step': 13757, 'epoch': 2} {'type': 'loss', 'content': 0.12184508889913559, 'timestamp': '2025-09-10 02:42:03.853465', 'step': 13758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:03.887964', 'step': 13758, 'epoch': 2} {'type': 'loss', 'content': 0.09312529116868973, 'timestamp': '2025-09-10 02:42:03.890344', 'step': 13759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:03.921202', 'step': 13759, 'epoch': 2} {'type': 'loss', 'content': 0.19796743988990784, 'timestamp': '2025-09-10 02:42:03.944093', 'step': 13760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:03.975202', 'step': 13760, 'epoch': 2} {'type': 'loss', 'content': 0.13029298186302185, 'timestamp': '2025-09-10 02:42:03.977338', 'step': 13761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:04.009078', 'step': 13761, 'epoch': 2} {'type': 'loss', 'content': 0.08702729642391205, 'timestamp': '2025-09-10 02:42:04.011252', 'step': 13762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:04.041367', 'step': 13762, 'epoch': 2} {'type': 'loss', 'content': 0.08837951719760895, 'timestamp': '2025-09-10 02:42:04.043872', 'step': 13763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:04.079405', 'step': 13763, 'epoch': 2} {'type': 'loss', 'content': 0.10563850402832031, 'timestamp': '2025-09-10 02:42:04.102946', 'step': 13764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:04.133525', 'step': 13764, 'epoch': 2} {'type': 'loss', 'content': 0.096225306391716, 'timestamp': '2025-09-10 02:42:04.135992', 'step': 13765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:04.166818', 'step': 13765, 'epoch': 2} {'type': 'loss', 'content': 0.087142214179039, 'timestamp': '2025-09-10 02:42:04.172261', 'step': 13766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:04.203031', 'step': 13766, 'epoch': 2} {'type': 'loss', 'content': 0.18303947150707245, 'timestamp': '2025-09-10 02:42:04.205250', 'step': 13767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:42:04.235551', 'step': 13767, 'epoch': 2} {'type': 'loss', 'content': 0.0477091521024704, 'timestamp': '2025-09-10 02:42:04.260803', 'step': 13768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:04.292748', 'step': 13768, 'epoch': 2} {'type': 'loss', 'content': 0.09708382189273834, 'timestamp': '2025-09-10 02:42:04.299408', 'step': 13769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:04.333425', 'step': 13769, 'epoch': 2} {'type': 'loss', 'content': 0.10239056497812271, 'timestamp': '2025-09-10 02:42:04.336063', 'step': 13770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:04.367042', 'step': 13770, 'epoch': 2} {'type': 'loss', 'content': 0.11291484534740448, 'timestamp': '2025-09-10 02:42:04.375730', 'step': 13771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:04.407180', 'step': 13771, 'epoch': 2} {'type': 'loss', 'content': 0.02450333908200264, 'timestamp': '2025-09-10 02:42:04.430693', 'step': 13772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:04.461027', 'step': 13772, 'epoch': 2} {'type': 'loss', 'content': 0.14665114879608154, 'timestamp': '2025-09-10 02:42:04.466294', 'step': 13773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:04.502854', 'step': 13773, 'epoch': 2} {'type': 'loss', 'content': 0.09729015082120895, 'timestamp': '2025-09-10 02:42:04.510302', 'step': 13774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:04.544692', 'step': 13774, 'epoch': 2} {'type': 'loss', 'content': 0.1478937864303589, 'timestamp': '2025-09-10 02:42:04.547345', 'step': 13775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:04.577894', 'step': 13775, 'epoch': 2} {'type': 'loss', 'content': 0.14032873511314392, 'timestamp': '2025-09-10 02:42:04.603661', 'step': 13776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:04.634326', 'step': 13776, 'epoch': 2} {'type': 'loss', 'content': 0.16574344038963318, 'timestamp': '2025-09-10 02:42:04.636563', 'step': 13777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:04.667802', 'step': 13777, 'epoch': 2} {'type': 'loss', 'content': 0.20958687365055084, 'timestamp': '2025-09-10 02:42:04.670291', 'step': 13778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:04.705700', 'step': 13778, 'epoch': 2} {'type': 'loss', 'content': 0.16088999807834625, 'timestamp': '2025-09-10 02:42:04.708257', 'step': 13779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:04.738755', 'step': 13779, 'epoch': 2} {'type': 'loss', 'content': 0.09134840965270996, 'timestamp': '2025-09-10 02:42:04.762384', 'step': 13780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:04.796225', 'step': 13780, 'epoch': 2} {'type': 'loss', 'content': 0.11185529083013535, 'timestamp': '2025-09-10 02:42:04.798443', 'step': 13781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:04.835840', 'step': 13781, 'epoch': 2} {'type': 'loss', 'content': 0.16612786054611206, 'timestamp': '2025-09-10 02:42:04.838597', 'step': 13782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:04.872750', 'step': 13782, 'epoch': 2} {'type': 'loss', 'content': 0.09693217277526855, 'timestamp': '2025-09-10 02:42:04.876480', 'step': 13783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:04.906306', 'step': 13783, 'epoch': 2} {'type': 'loss', 'content': 0.09135562181472778, 'timestamp': '2025-09-10 02:42:04.929889', 'step': 13784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:04.960285', 'step': 13784, 'epoch': 2} {'type': 'loss', 'content': 0.1087622120976448, 'timestamp': '2025-09-10 02:42:04.962281', 'step': 13785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:04.994647', 'step': 13785, 'epoch': 2} {'type': 'loss', 'content': 0.0431959442794323, 'timestamp': '2025-09-10 02:42:04.997123', 'step': 13786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:05.027008', 'step': 13786, 'epoch': 2} {'type': 'loss', 'content': 0.03797713294625282, 'timestamp': '2025-09-10 02:42:05.029617', 'step': 13787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:05.060590', 'step': 13787, 'epoch': 2} {'type': 'loss', 'content': 0.05879832059144974, 'timestamp': '2025-09-10 02:42:05.084057', 'step': 13788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:05.114842', 'step': 13788, 'epoch': 2} {'type': 'loss', 'content': 0.11989938467741013, 'timestamp': '2025-09-10 02:42:05.116760', 'step': 13789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:05.149018', 'step': 13789, 'epoch': 2} {'type': 'loss', 'content': 0.10709287971258163, 'timestamp': '2025-09-10 02:42:05.152244', 'step': 13790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:05.183890', 'step': 13790, 'epoch': 2} {'type': 'loss', 'content': 0.12379031628370285, 'timestamp': '2025-09-10 02:42:05.186091', 'step': 13791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:05.215724', 'step': 13791, 'epoch': 2} {'type': 'loss', 'content': 0.07584165781736374, 'timestamp': '2025-09-10 02:42:05.239037', 'step': 13792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:05.273002', 'step': 13792, 'epoch': 2} {'type': 'loss', 'content': 0.05878186225891113, 'timestamp': '2025-09-10 02:42:05.274846', 'step': 13793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:05.307972', 'step': 13793, 'epoch': 2} {'type': 'loss', 'content': 0.1081736758351326, 'timestamp': '2025-09-10 02:42:05.312412', 'step': 13794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:05.342869', 'step': 13794, 'epoch': 2} {'type': 'loss', 'content': 0.10345959663391113, 'timestamp': '2025-09-10 02:42:05.345122', 'step': 13795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:05.376313', 'step': 13795, 'epoch': 2} {'type': 'loss', 'content': 0.1651218831539154, 'timestamp': '2025-09-10 02:42:05.399730', 'step': 13796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:05.433705', 'step': 13796, 'epoch': 2} {'type': 'loss', 'content': 0.08008070290088654, 'timestamp': '2025-09-10 02:42:05.440984', 'step': 13797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:05.476586', 'step': 13797, 'epoch': 2} {'type': 'loss', 'content': 0.1305488646030426, 'timestamp': '2025-09-10 02:42:05.479373', 'step': 13798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:05.516593', 'step': 13798, 'epoch': 2} {'type': 'loss', 'content': 0.10625756531953812, 'timestamp': '2025-09-10 02:42:05.522450', 'step': 13799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:05.559858', 'step': 13799, 'epoch': 2} {'type': 'loss', 'content': 0.09617652744054794, 'timestamp': '2025-09-10 02:42:05.583079', 'step': 13800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:05.614148', 'step': 13800, 'epoch': 2} {'type': 'loss', 'content': 0.11022999882698059, 'timestamp': '2025-09-10 02:42:05.616515', 'step': 13801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:05.646238', 'step': 13801, 'epoch': 2} {'type': 'loss', 'content': 0.1349174976348877, 'timestamp': '2025-09-10 02:42:05.648397', 'step': 13802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:05.679740', 'step': 13802, 'epoch': 2} {'type': 'loss', 'content': 0.170446515083313, 'timestamp': '2025-09-10 02:42:05.685828', 'step': 13803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:05.724039', 'step': 13803, 'epoch': 2} {'type': 'loss', 'content': 0.07782085984945297, 'timestamp': '2025-09-10 02:42:05.753813', 'step': 13804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:05.785439', 'step': 13804, 'epoch': 2} {'type': 'loss', 'content': 0.16139806807041168, 'timestamp': '2025-09-10 02:42:05.789902', 'step': 13805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:05.821358', 'step': 13805, 'epoch': 2} {'type': 'loss', 'content': 0.07523316890001297, 'timestamp': '2025-09-10 02:42:05.823482', 'step': 13806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:05.853517', 'step': 13806, 'epoch': 2} {'type': 'loss', 'content': 0.0948006883263588, 'timestamp': '2025-09-10 02:42:05.857214', 'step': 13807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:05.892018', 'step': 13807, 'epoch': 2} {'type': 'loss', 'content': 0.07387685775756836, 'timestamp': '2025-09-10 02:42:05.915682', 'step': 13808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:05.946304', 'step': 13808, 'epoch': 2} {'type': 'loss', 'content': 0.08823618292808533, 'timestamp': '2025-09-10 02:42:05.949992', 'step': 13809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:05.988195', 'step': 13809, 'epoch': 2} {'type': 'loss', 'content': 0.11151603609323502, 'timestamp': '2025-09-10 02:42:05.991176', 'step': 13810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:06.022838', 'step': 13810, 'epoch': 2} {'type': 'loss', 'content': 0.0678086131811142, 'timestamp': '2025-09-10 02:42:06.028447', 'step': 13811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:06.064965', 'step': 13811, 'epoch': 2} {'type': 'loss', 'content': 0.09239237010478973, 'timestamp': '2025-09-10 02:42:06.088626', 'step': 13812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:06.120420', 'step': 13812, 'epoch': 2} {'type': 'loss', 'content': 0.1150941252708435, 'timestamp': '2025-09-10 02:42:06.123003', 'step': 13813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:06.154448', 'step': 13813, 'epoch': 2} {'type': 'loss', 'content': 0.12803566455841064, 'timestamp': '2025-09-10 02:42:06.157128', 'step': 13814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:06.189402', 'step': 13814, 'epoch': 2} {'type': 'loss', 'content': 0.1288764476776123, 'timestamp': '2025-09-10 02:42:06.193520', 'step': 13815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:06.232287', 'step': 13815, 'epoch': 2} {'type': 'loss', 'content': 0.10597936809062958, 'timestamp': '2025-09-10 02:42:06.257299', 'step': 13816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:06.288292', 'step': 13816, 'epoch': 2} {'type': 'loss', 'content': 0.09889832139015198, 'timestamp': '2025-09-10 02:42:06.291395', 'step': 13817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:06.328780', 'step': 13817, 'epoch': 2} {'type': 'loss', 'content': 0.145822674036026, 'timestamp': '2025-09-10 02:42:06.330789', 'step': 13818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:06.361133', 'step': 13818, 'epoch': 2} {'type': 'loss', 'content': 0.0846254900097847, 'timestamp': '2025-09-10 02:42:06.364082', 'step': 13819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:06.402397', 'step': 13819, 'epoch': 2} {'type': 'loss', 'content': 0.2058447301387787, 'timestamp': '2025-09-10 02:42:06.427820', 'step': 13820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:06.458446', 'step': 13820, 'epoch': 2} {'type': 'loss', 'content': 0.06930377334356308, 'timestamp': '2025-09-10 02:42:06.460965', 'step': 13821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:06.491973', 'step': 13821, 'epoch': 2} {'type': 'loss', 'content': 0.09028743207454681, 'timestamp': '2025-09-10 02:42:06.494273', 'step': 13822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:06.526258', 'step': 13822, 'epoch': 2} {'type': 'loss', 'content': 0.09240449965000153, 'timestamp': '2025-09-10 02:42:06.527974', 'step': 13823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:06.560512', 'step': 13823, 'epoch': 2} {'type': 'loss', 'content': 0.14271490275859833, 'timestamp': '2025-09-10 02:42:06.584061', 'step': 13824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:06.619208', 'step': 13824, 'epoch': 2} {'type': 'loss', 'content': 0.138319194316864, 'timestamp': '2025-09-10 02:42:06.625144', 'step': 13825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:06.656135', 'step': 13825, 'epoch': 2} {'type': 'loss', 'content': 0.06996800750494003, 'timestamp': '2025-09-10 02:42:06.658383', 'step': 13826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:06.689398', 'step': 13826, 'epoch': 2} {'type': 'loss', 'content': 0.11842905730009079, 'timestamp': '2025-09-10 02:42:06.691769', 'step': 13827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:06.724622', 'step': 13827, 'epoch': 2} {'type': 'loss', 'content': 0.09324660897254944, 'timestamp': '2025-09-10 02:42:06.748676', 'step': 13828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:06.780868', 'step': 13828, 'epoch': 2} {'type': 'loss', 'content': 0.11409562081098557, 'timestamp': '2025-09-10 02:42:06.783583', 'step': 13829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:06.815475', 'step': 13829, 'epoch': 2} {'type': 'loss', 'content': 0.13531357049942017, 'timestamp': '2025-09-10 02:42:06.817813', 'step': 13830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:06.848700', 'step': 13830, 'epoch': 2} {'type': 'loss', 'content': 0.08849320560693741, 'timestamp': '2025-09-10 02:42:06.850846', 'step': 13831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:06.882490', 'step': 13831, 'epoch': 2} {'type': 'loss', 'content': 0.06675224751234055, 'timestamp': '2025-09-10 02:42:06.906134', 'step': 13832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:06.938723', 'step': 13832, 'epoch': 2} {'type': 'loss', 'content': 0.13083292543888092, 'timestamp': '2025-09-10 02:42:06.940793', 'step': 13833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:06.970841', 'step': 13833, 'epoch': 2} {'type': 'loss', 'content': 0.04842941835522652, 'timestamp': '2025-09-10 02:42:06.973263', 'step': 13834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.003710', 'step': 13834, 'epoch': 2} {'type': 'loss', 'content': 0.0705626904964447, 'timestamp': '2025-09-10 02:42:07.006041', 'step': 13835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.036169', 'step': 13835, 'epoch': 2} {'type': 'loss', 'content': 0.14172351360321045, 'timestamp': '2025-09-10 02:42:07.059525', 'step': 13836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:07.089690', 'step': 13836, 'epoch': 2} {'type': 'loss', 'content': 0.0827428326010704, 'timestamp': '2025-09-10 02:42:07.092294', 'step': 13837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:07.125158', 'step': 13837, 'epoch': 2} {'type': 'loss', 'content': 0.09803954511880875, 'timestamp': '2025-09-10 02:42:07.127708', 'step': 13838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:07.158762', 'step': 13838, 'epoch': 2} {'type': 'loss', 'content': 0.11829956620931625, 'timestamp': '2025-09-10 02:42:07.160936', 'step': 13839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.190910', 'step': 13839, 'epoch': 2} {'type': 'loss', 'content': 0.13983392715454102, 'timestamp': '2025-09-10 02:42:07.214409', 'step': 13840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:07.246243', 'step': 13840, 'epoch': 2} {'type': 'loss', 'content': 0.07196863740682602, 'timestamp': '2025-09-10 02:42:07.248388', 'step': 13841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:07.278404', 'step': 13841, 'epoch': 2} {'type': 'loss', 'content': 0.08860478550195694, 'timestamp': '2025-09-10 02:42:07.280994', 'step': 13842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.311581', 'step': 13842, 'epoch': 2} {'type': 'loss', 'content': 0.05826803296804428, 'timestamp': '2025-09-10 02:42:07.314548', 'step': 13843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:07.346653', 'step': 13843, 'epoch': 2} {'type': 'loss', 'content': 0.07365261763334274, 'timestamp': '2025-09-10 02:42:07.370374', 'step': 13844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:07.400606', 'step': 13844, 'epoch': 2} {'type': 'loss', 'content': 0.11671897023916245, 'timestamp': '2025-09-10 02:42:07.402969', 'step': 13845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:07.434578', 'step': 13845, 'epoch': 2} {'type': 'loss', 'content': 0.1722172349691391, 'timestamp': '2025-09-10 02:42:07.437032', 'step': 13846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:07.467330', 'step': 13846, 'epoch': 2} {'type': 'loss', 'content': 0.1531049609184265, 'timestamp': '2025-09-10 02:42:07.469681', 'step': 13847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:07.501128', 'step': 13847, 'epoch': 2} {'type': 'loss', 'content': 0.2293030321598053, 'timestamp': '2025-09-10 02:42:07.524825', 'step': 13848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.558652', 'step': 13848, 'epoch': 2} {'type': 'loss', 'content': 0.11827898770570755, 'timestamp': '2025-09-10 02:42:07.560976', 'step': 13849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:07.591131', 'step': 13849, 'epoch': 2} {'type': 'loss', 'content': 0.07522349059581757, 'timestamp': '2025-09-10 02:42:07.593568', 'step': 13850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.623969', 'step': 13850, 'epoch': 2} {'type': 'loss', 'content': 0.09319797903299332, 'timestamp': '2025-09-10 02:42:07.626146', 'step': 13851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.656446', 'step': 13851, 'epoch': 2} {'type': 'loss', 'content': 0.15561524033546448, 'timestamp': '2025-09-10 02:42:07.682404', 'step': 13852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:07.713403', 'step': 13852, 'epoch': 2} {'type': 'loss', 'content': 0.09535034000873566, 'timestamp': '2025-09-10 02:42:07.715849', 'step': 13853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.745794', 'step': 13853, 'epoch': 2} {'type': 'loss', 'content': 0.1549510657787323, 'timestamp': '2025-09-10 02:42:07.748344', 'step': 13854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.778689', 'step': 13854, 'epoch': 2} {'type': 'loss', 'content': 0.07466184347867966, 'timestamp': '2025-09-10 02:42:07.780781', 'step': 13855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:07.811743', 'step': 13855, 'epoch': 2} {'type': 'loss', 'content': 0.1530701369047165, 'timestamp': '2025-09-10 02:42:07.836089', 'step': 13856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.867146', 'step': 13856, 'epoch': 2} {'type': 'loss', 'content': 0.14235664904117584, 'timestamp': '2025-09-10 02:42:07.869876', 'step': 13857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:07.900922', 'step': 13857, 'epoch': 2} {'type': 'loss', 'content': 0.1125868484377861, 'timestamp': '2025-09-10 02:42:07.903075', 'step': 13858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:07.933370', 'step': 13858, 'epoch': 2} {'type': 'loss', 'content': 0.10980066657066345, 'timestamp': '2025-09-10 02:42:07.935401', 'step': 13859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:07.966990', 'step': 13859, 'epoch': 2} {'type': 'loss', 'content': 0.10211779177188873, 'timestamp': '2025-09-10 02:42:07.991716', 'step': 13860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.024531', 'step': 13860, 'epoch': 2} {'type': 'loss', 'content': 0.12206804007291794, 'timestamp': '2025-09-10 02:42:08.026960', 'step': 13861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.057625', 'step': 13861, 'epoch': 2} {'type': 'loss', 'content': 0.1377587616443634, 'timestamp': '2025-09-10 02:42:08.059990', 'step': 13862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.090271', 'step': 13862, 'epoch': 2} {'type': 'loss', 'content': 0.14583200216293335, 'timestamp': '2025-09-10 02:42:08.092707', 'step': 13863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:08.123024', 'step': 13863, 'epoch': 2} {'type': 'loss', 'content': 0.11032915860414505, 'timestamp': '2025-09-10 02:42:08.146648', 'step': 13864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:08.177421', 'step': 13864, 'epoch': 2} {'type': 'loss', 'content': 0.15372346341609955, 'timestamp': '2025-09-10 02:42:08.179995', 'step': 13865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.210424', 'step': 13865, 'epoch': 2} {'type': 'loss', 'content': 0.05196905881166458, 'timestamp': '2025-09-10 02:42:08.212527', 'step': 13866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:08.245673', 'step': 13866, 'epoch': 2} {'type': 'loss', 'content': 0.11123163998126984, 'timestamp': '2025-09-10 02:42:08.248269', 'step': 13867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:08.278412', 'step': 13867, 'epoch': 2} {'type': 'loss', 'content': 0.08135516941547394, 'timestamp': '2025-09-10 02:42:08.302405', 'step': 13868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.333380', 'step': 13868, 'epoch': 2} {'type': 'loss', 'content': 0.12325490266084671, 'timestamp': '2025-09-10 02:42:08.336114', 'step': 13869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.367466', 'step': 13869, 'epoch': 2} {'type': 'loss', 'content': 0.10461700707674026, 'timestamp': '2025-09-10 02:42:08.369783', 'step': 13870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.401036', 'step': 13870, 'epoch': 2} {'type': 'loss', 'content': 0.05659043788909912, 'timestamp': '2025-09-10 02:42:08.403548', 'step': 13871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:08.436323', 'step': 13871, 'epoch': 2} {'type': 'loss', 'content': 0.06257347017526627, 'timestamp': '2025-09-10 02:42:08.460124', 'step': 13872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:42:08.493001', 'step': 13872, 'epoch': 2} {'type': 'loss', 'content': 0.07305484265089035, 'timestamp': '2025-09-10 02:42:08.497708', 'step': 13873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:08.530613', 'step': 13873, 'epoch': 2} {'type': 'loss', 'content': 0.16893930733203888, 'timestamp': '2025-09-10 02:42:08.534447', 'step': 13874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:08.577963', 'step': 13874, 'epoch': 2} {'type': 'loss', 'content': 0.09964226931333542, 'timestamp': '2025-09-10 02:42:08.580367', 'step': 13875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.612509', 'step': 13875, 'epoch': 2} {'type': 'loss', 'content': 0.08437500894069672, 'timestamp': '2025-09-10 02:42:08.639296', 'step': 13876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:08.671182', 'step': 13876, 'epoch': 2} {'type': 'loss', 'content': 0.06795428693294525, 'timestamp': '2025-09-10 02:42:08.674529', 'step': 13877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.705685', 'step': 13877, 'epoch': 2} {'type': 'loss', 'content': 0.04135606810450554, 'timestamp': '2025-09-10 02:42:08.710358', 'step': 13878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.742693', 'step': 13878, 'epoch': 2} {'type': 'loss', 'content': 0.1461644470691681, 'timestamp': '2025-09-10 02:42:08.745144', 'step': 13879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.776566', 'step': 13879, 'epoch': 2} {'type': 'loss', 'content': 0.0976225733757019, 'timestamp': '2025-09-10 02:42:08.800148', 'step': 13880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.837345', 'step': 13880, 'epoch': 2} {'type': 'loss', 'content': 0.08134564012289047, 'timestamp': '2025-09-10 02:42:08.841748', 'step': 13881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:08.874780', 'step': 13881, 'epoch': 2} {'type': 'loss', 'content': 0.15938496589660645, 'timestamp': '2025-09-10 02:42:08.880391', 'step': 13882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:08.914574', 'step': 13882, 'epoch': 2} {'type': 'loss', 'content': 0.11394602060317993, 'timestamp': '2025-09-10 02:42:08.916799', 'step': 13883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:08.950289', 'step': 13883, 'epoch': 2} {'type': 'loss', 'content': 0.08209402859210968, 'timestamp': '2025-09-10 02:42:08.973868', 'step': 13884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.004956', 'step': 13884, 'epoch': 2} {'type': 'loss', 'content': 0.039250146597623825, 'timestamp': '2025-09-10 02:42:09.007162', 'step': 13885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.049119', 'step': 13885, 'epoch': 2} {'type': 'loss', 'content': 0.08841404318809509, 'timestamp': '2025-09-10 02:42:09.051980', 'step': 13886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.082479', 'step': 13886, 'epoch': 2} {'type': 'loss', 'content': 0.10013747960329056, 'timestamp': '2025-09-10 02:42:09.085045', 'step': 13887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:09.123140', 'step': 13887, 'epoch': 2} {'type': 'loss', 'content': 0.1008075550198555, 'timestamp': '2025-09-10 02:42:09.146447', 'step': 13888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:09.176910', 'step': 13888, 'epoch': 2} {'type': 'loss', 'content': 0.07335460931062698, 'timestamp': '2025-09-10 02:42:09.181580', 'step': 13889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:09.212911', 'step': 13889, 'epoch': 2} {'type': 'loss', 'content': 0.1599450409412384, 'timestamp': '2025-09-10 02:42:09.215809', 'step': 13890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:09.246083', 'step': 13890, 'epoch': 2} {'type': 'loss', 'content': 0.09050614386796951, 'timestamp': '2025-09-10 02:42:09.248124', 'step': 13891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:09.277901', 'step': 13891, 'epoch': 2} {'type': 'loss', 'content': 0.09643083065748215, 'timestamp': '2025-09-10 02:42:09.302577', 'step': 13892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.333234', 'step': 13892, 'epoch': 2} {'type': 'loss', 'content': 0.06273508816957474, 'timestamp': '2025-09-10 02:42:09.335909', 'step': 13893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:09.366649', 'step': 13893, 'epoch': 2} {'type': 'loss', 'content': 0.05666681379079819, 'timestamp': '2025-09-10 02:42:09.373058', 'step': 13894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.404203', 'step': 13894, 'epoch': 2} {'type': 'loss', 'content': 0.12791068851947784, 'timestamp': '2025-09-10 02:42:09.406206', 'step': 13895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:09.436775', 'step': 13895, 'epoch': 2} {'type': 'loss', 'content': 0.12467744201421738, 'timestamp': '2025-09-10 02:42:09.461950', 'step': 13896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.498912', 'step': 13896, 'epoch': 2} {'type': 'loss', 'content': 0.1076783686876297, 'timestamp': '2025-09-10 02:42:09.502055', 'step': 13897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:09.532531', 'step': 13897, 'epoch': 2} {'type': 'loss', 'content': 0.07588117569684982, 'timestamp': '2025-09-10 02:42:09.543209', 'step': 13898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:09.576934', 'step': 13898, 'epoch': 2} {'type': 'loss', 'content': 0.10024195909500122, 'timestamp': '2025-09-10 02:42:09.579080', 'step': 13899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:09.609203', 'step': 13899, 'epoch': 2} {'type': 'loss', 'content': 0.04518592357635498, 'timestamp': '2025-09-10 02:42:09.634207', 'step': 13900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.668356', 'step': 13900, 'epoch': 2} {'type': 'loss', 'content': 0.10811126232147217, 'timestamp': '2025-09-10 02:42:09.670553', 'step': 13901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.703752', 'step': 13901, 'epoch': 2} {'type': 'loss', 'content': 0.10364952683448792, 'timestamp': '2025-09-10 02:42:09.711331', 'step': 13902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.744585', 'step': 13902, 'epoch': 2} {'type': 'loss', 'content': 0.06497623026371002, 'timestamp': '2025-09-10 02:42:09.746874', 'step': 13903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:09.777063', 'step': 13903, 'epoch': 2} {'type': 'loss', 'content': 0.1087137833237648, 'timestamp': '2025-09-10 02:42:09.800764', 'step': 13904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:09.831493', 'step': 13904, 'epoch': 2} {'type': 'loss', 'content': 0.12963992357254028, 'timestamp': '2025-09-10 02:42:09.834253', 'step': 13905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:09.866676', 'step': 13905, 'epoch': 2} {'type': 'loss', 'content': 0.1467336267232895, 'timestamp': '2025-09-10 02:42:09.869035', 'step': 13906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:09.899970', 'step': 13906, 'epoch': 2} {'type': 'loss', 'content': 0.1817464977502823, 'timestamp': '2025-09-10 02:42:09.904464', 'step': 13907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:09.936137', 'step': 13907, 'epoch': 2} {'type': 'loss', 'content': 0.12904934585094452, 'timestamp': '2025-09-10 02:42:09.962915', 'step': 13908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:10.017923', 'step': 13908, 'epoch': 2} {'type': 'loss', 'content': 0.06759385764598846, 'timestamp': '2025-09-10 02:42:10.035247', 'step': 13909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:10.141006', 'step': 13909, 'epoch': 2} {'type': 'loss', 'content': 0.07919401675462723, 'timestamp': '2025-09-10 02:42:10.143609', 'step': 13910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:10.183576', 'step': 13910, 'epoch': 2} {'type': 'loss', 'content': 0.038991037756204605, 'timestamp': '2025-09-10 02:42:10.202536', 'step': 13911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:10.250471', 'step': 13911, 'epoch': 2} {'type': 'loss', 'content': 0.06640242040157318, 'timestamp': '2025-09-10 02:42:10.291911', 'step': 13912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:10.375395', 'step': 13912, 'epoch': 2} {'type': 'loss', 'content': 0.086126908659935, 'timestamp': '2025-09-10 02:42:10.380212', 'step': 13913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:10.452609', 'step': 13913, 'epoch': 2} {'type': 'loss', 'content': 0.11041445285081863, 'timestamp': '2025-09-10 02:42:10.469217', 'step': 13914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:10.511806', 'step': 13914, 'epoch': 2} {'type': 'loss', 'content': 0.09185829758644104, 'timestamp': '2025-09-10 02:42:10.519143', 'step': 13915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:10.575915', 'step': 13915, 'epoch': 2} {'type': 'loss', 'content': 0.10313570499420166, 'timestamp': '2025-09-10 02:42:10.601420', 'step': 13916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:10.638193', 'step': 13916, 'epoch': 2} {'type': 'loss', 'content': 0.09038986265659332, 'timestamp': '2025-09-10 02:42:10.646657', 'step': 13917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:10.685686', 'step': 13917, 'epoch': 2} {'type': 'loss', 'content': 0.05425393208861351, 'timestamp': '2025-09-10 02:42:10.696224', 'step': 13918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:10.747229', 'step': 13918, 'epoch': 2} {'type': 'loss', 'content': 0.1505081206560135, 'timestamp': '2025-09-10 02:42:10.750529', 'step': 13919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:10.784625', 'step': 13919, 'epoch': 2} {'type': 'loss', 'content': 0.07268256694078445, 'timestamp': '2025-09-10 02:42:10.807994', 'step': 13920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:10.839401', 'step': 13920, 'epoch': 2} {'type': 'loss', 'content': 0.08458124101161957, 'timestamp': '2025-09-10 02:42:10.841928', 'step': 13921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:10.877368', 'step': 13921, 'epoch': 2} {'type': 'loss', 'content': 0.053603898733854294, 'timestamp': '2025-09-10 02:42:10.880599', 'step': 13922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:10.919626', 'step': 13922, 'epoch': 2} {'type': 'loss', 'content': 0.1479881852865219, 'timestamp': '2025-09-10 02:42:10.922931', 'step': 13923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:10.963559', 'step': 13923, 'epoch': 2} {'type': 'loss', 'content': 0.15383943915367126, 'timestamp': '2025-09-10 02:42:10.987862', 'step': 13924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.020977', 'step': 13924, 'epoch': 2} {'type': 'loss', 'content': 0.1303456425666809, 'timestamp': '2025-09-10 02:42:11.024990', 'step': 13925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.061511', 'step': 13925, 'epoch': 2} {'type': 'loss', 'content': 0.04351316764950752, 'timestamp': '2025-09-10 02:42:11.065749', 'step': 13926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:11.099891', 'step': 13926, 'epoch': 2} {'type': 'loss', 'content': 0.06171935424208641, 'timestamp': '2025-09-10 02:42:11.102805', 'step': 13927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.139464', 'step': 13927, 'epoch': 2} {'type': 'loss', 'content': 0.13880352675914764, 'timestamp': '2025-09-10 02:42:11.166883', 'step': 13928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.198595', 'step': 13928, 'epoch': 2} {'type': 'loss', 'content': 0.0829518660902977, 'timestamp': '2025-09-10 02:42:11.200682', 'step': 13929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.232290', 'step': 13929, 'epoch': 2} {'type': 'loss', 'content': 0.05239564925432205, 'timestamp': '2025-09-10 02:42:11.234892', 'step': 13930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:11.271208', 'step': 13930, 'epoch': 2} {'type': 'loss', 'content': 0.09939982742071152, 'timestamp': '2025-09-10 02:42:11.274264', 'step': 13931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.307745', 'step': 13931, 'epoch': 2} {'type': 'loss', 'content': 0.052524954080581665, 'timestamp': '2025-09-10 02:42:11.332166', 'step': 13932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.364232', 'step': 13932, 'epoch': 2} {'type': 'loss', 'content': 0.08633561432361603, 'timestamp': '2025-09-10 02:42:11.366431', 'step': 13933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:11.396385', 'step': 13933, 'epoch': 2} {'type': 'loss', 'content': 0.06989726424217224, 'timestamp': '2025-09-10 02:42:11.401157', 'step': 13934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:11.431789', 'step': 13934, 'epoch': 2} {'type': 'loss', 'content': 0.11424124240875244, 'timestamp': '2025-09-10 02:42:11.435133', 'step': 13935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:11.476902', 'step': 13935, 'epoch': 2} {'type': 'loss', 'content': 0.04845980554819107, 'timestamp': '2025-09-10 02:42:11.500530', 'step': 13936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.538494', 'step': 13936, 'epoch': 2} {'type': 'loss', 'content': 0.07692132890224457, 'timestamp': '2025-09-10 02:42:11.541148', 'step': 13937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.574716', 'step': 13937, 'epoch': 2} {'type': 'loss', 'content': 0.09939105808734894, 'timestamp': '2025-09-10 02:42:11.578475', 'step': 13938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.618917', 'step': 13938, 'epoch': 2} {'type': 'loss', 'content': 0.11740867048501968, 'timestamp': '2025-09-10 02:42:11.623130', 'step': 13939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.659530', 'step': 13939, 'epoch': 2} {'type': 'loss', 'content': 0.10611271113157272, 'timestamp': '2025-09-10 02:42:11.687479', 'step': 13940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.730499', 'step': 13940, 'epoch': 2} {'type': 'loss', 'content': 0.135670006275177, 'timestamp': '2025-09-10 02:42:11.732986', 'step': 13941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:11.764573', 'step': 13941, 'epoch': 2} {'type': 'loss', 'content': 0.11880319565534592, 'timestamp': '2025-09-10 02:42:11.766599', 'step': 13942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:11.799916', 'step': 13942, 'epoch': 2} {'type': 'loss', 'content': 0.10288656502962112, 'timestamp': '2025-09-10 02:42:11.802685', 'step': 13943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.838564', 'step': 13943, 'epoch': 2} {'type': 'loss', 'content': 0.09467699378728867, 'timestamp': '2025-09-10 02:42:11.863634', 'step': 13944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:11.893847', 'step': 13944, 'epoch': 2} {'type': 'loss', 'content': 0.13573960959911346, 'timestamp': '2025-09-10 02:42:11.899267', 'step': 13945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:11.934966', 'step': 13945, 'epoch': 2} {'type': 'loss', 'content': 0.09014588594436646, 'timestamp': '2025-09-10 02:42:11.936827', 'step': 13946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:11.968433', 'step': 13946, 'epoch': 2} {'type': 'loss', 'content': 0.11649824678897858, 'timestamp': '2025-09-10 02:42:11.970498', 'step': 13947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:12.007691', 'step': 13947, 'epoch': 2} {'type': 'loss', 'content': 0.07778982073068619, 'timestamp': '2025-09-10 02:42:12.031808', 'step': 13948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:12.074853', 'step': 13948, 'epoch': 2} {'type': 'loss', 'content': 0.10423865169286728, 'timestamp': '2025-09-10 02:42:12.076998', 'step': 13949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:12.110393', 'step': 13949, 'epoch': 2} {'type': 'loss', 'content': 0.09661076962947845, 'timestamp': '2025-09-10 02:42:12.112678', 'step': 13950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:12.147296', 'step': 13950, 'epoch': 2} {'type': 'loss', 'content': 0.08465784788131714, 'timestamp': '2025-09-10 02:42:12.150018', 'step': 13951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:12.182248', 'step': 13951, 'epoch': 2} {'type': 'loss', 'content': 0.09481760114431381, 'timestamp': '2025-09-10 02:42:12.206292', 'step': 13952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:12.242270', 'step': 13952, 'epoch': 2} {'type': 'loss', 'content': 0.13108131289482117, 'timestamp': '2025-09-10 02:42:12.245298', 'step': 13953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:12.278864', 'step': 13953, 'epoch': 2} {'type': 'loss', 'content': 0.0842154249548912, 'timestamp': '2025-09-10 02:42:12.281329', 'step': 13954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:12.319522', 'step': 13954, 'epoch': 2} {'type': 'loss', 'content': 0.1231454387307167, 'timestamp': '2025-09-10 02:42:12.322411', 'step': 13955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:12.354000', 'step': 13955, 'epoch': 2} {'type': 'loss', 'content': 0.09493476152420044, 'timestamp': '2025-09-10 02:42:12.377531', 'step': 13956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:12.410457', 'step': 13956, 'epoch': 2} {'type': 'loss', 'content': 0.08882425725460052, 'timestamp': '2025-09-10 02:42:12.412857', 'step': 13957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:12.445045', 'step': 13957, 'epoch': 2} {'type': 'loss', 'content': 0.17276957631111145, 'timestamp': '2025-09-10 02:42:12.447835', 'step': 13958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:12.480175', 'step': 13958, 'epoch': 2} {'type': 'loss', 'content': 0.1287011057138443, 'timestamp': '2025-09-10 02:42:12.485456', 'step': 13959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:12.524602', 'step': 13959, 'epoch': 2} {'type': 'loss', 'content': 0.1813051998615265, 'timestamp': '2025-09-10 02:42:12.557811', 'step': 13960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:12.593217', 'step': 13960, 'epoch': 2} {'type': 'loss', 'content': 0.14107193052768707, 'timestamp': '2025-09-10 02:42:12.596726', 'step': 13961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:12.633809', 'step': 13961, 'epoch': 2} {'type': 'loss', 'content': 0.12850314378738403, 'timestamp': '2025-09-10 02:42:12.637136', 'step': 13962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:12.671730', 'step': 13962, 'epoch': 2} {'type': 'loss', 'content': 0.10571098327636719, 'timestamp': '2025-09-10 02:42:12.676479', 'step': 13963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:12.710951', 'step': 13963, 'epoch': 2} {'type': 'loss', 'content': 0.14573365449905396, 'timestamp': '2025-09-10 02:42:12.738083', 'step': 13964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:12.768458', 'step': 13964, 'epoch': 2} {'type': 'loss', 'content': 0.0810432955622673, 'timestamp': '2025-09-10 02:42:12.771303', 'step': 13965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:12.804888', 'step': 13965, 'epoch': 2} {'type': 'loss', 'content': 0.06007716432213783, 'timestamp': '2025-09-10 02:42:12.810977', 'step': 13966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:12.846614', 'step': 13966, 'epoch': 2} {'type': 'loss', 'content': 0.1186138242483139, 'timestamp': '2025-09-10 02:42:12.849644', 'step': 13967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:12.880618', 'step': 13967, 'epoch': 2} {'type': 'loss', 'content': 0.1284869909286499, 'timestamp': '2025-09-10 02:42:12.904639', 'step': 13968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:12.937279', 'step': 13968, 'epoch': 2} {'type': 'loss', 'content': 0.12427745014429092, 'timestamp': '2025-09-10 02:42:12.939655', 'step': 13969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:12.971100', 'step': 13969, 'epoch': 2} {'type': 'loss', 'content': 0.05819877237081528, 'timestamp': '2025-09-10 02:42:12.975295', 'step': 13970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:13.011208', 'step': 13970, 'epoch': 2} {'type': 'loss', 'content': 0.1169886589050293, 'timestamp': '2025-09-10 02:42:13.013573', 'step': 13971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:13.045396', 'step': 13971, 'epoch': 2} {'type': 'loss', 'content': 0.14289315044879913, 'timestamp': '2025-09-10 02:42:13.070927', 'step': 13972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:13.101210', 'step': 13972, 'epoch': 2} {'type': 'loss', 'content': 0.05755999684333801, 'timestamp': '2025-09-10 02:42:13.104611', 'step': 13973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:13.139165', 'step': 13973, 'epoch': 2} {'type': 'loss', 'content': 0.10870900005102158, 'timestamp': '2025-09-10 02:42:13.141616', 'step': 13974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:13.172732', 'step': 13974, 'epoch': 2} {'type': 'loss', 'content': 0.11956055462360382, 'timestamp': '2025-09-10 02:42:13.175049', 'step': 13975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:13.205404', 'step': 13975, 'epoch': 2} {'type': 'loss', 'content': 0.1815933734178543, 'timestamp': '2025-09-10 02:42:13.229134', 'step': 13976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:13.260897', 'step': 13976, 'epoch': 2} {'type': 'loss', 'content': 0.07621290534734726, 'timestamp': '2025-09-10 02:42:13.264093', 'step': 13977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:13.299711', 'step': 13977, 'epoch': 2} {'type': 'loss', 'content': 0.11762519925832748, 'timestamp': '2025-09-10 02:42:13.302263', 'step': 13978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:13.333975', 'step': 13978, 'epoch': 2} {'type': 'loss', 'content': 0.13517266511917114, 'timestamp': '2025-09-10 02:42:13.336139', 'step': 13979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:13.366707', 'step': 13979, 'epoch': 2} {'type': 'loss', 'content': 0.07885921746492386, 'timestamp': '2025-09-10 02:42:13.389755', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:42:21.908714', 'step': 13980, 'epoch': 2} {'type': 'pplx', 'content': 7972.2378058233635, 'timestamp': '2025-09-10 02:42:21.911559', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:21.941340', 'step': 13980, 'epoch': 2} {'type': 'loss', 'content': 0.05039085075259209, 'timestamp': '2025-09-10 02:42:21.961322', 'step': 13981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:22.027697', 'step': 13981, 'epoch': 2} {'type': 'loss', 'content': 0.10848087817430496, 'timestamp': '2025-09-10 02:42:22.032315', 'step': 13982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:22.108847', 'step': 13982, 'epoch': 2} {'type': 'loss', 'content': 0.08861879259347916, 'timestamp': '2025-09-10 02:42:22.113804', 'step': 13983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:22.183609', 'step': 13983, 'epoch': 2} {'type': 'loss', 'content': 0.12985572218894958, 'timestamp': '2025-09-10 02:42:22.208559', 'step': 13984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:22.243139', 'step': 13984, 'epoch': 2} {'type': 'loss', 'content': 0.09597781300544739, 'timestamp': '2025-09-10 02:42:22.248616', 'step': 13985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:22.282978', 'step': 13985, 'epoch': 2} {'type': 'loss', 'content': 0.06497915089130402, 'timestamp': '2025-09-10 02:42:22.287566', 'step': 13986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:22.322896', 'step': 13986, 'epoch': 2} {'type': 'loss', 'content': 0.15318827331066132, 'timestamp': '2025-09-10 02:42:22.327098', 'step': 13987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:22.359501', 'step': 13987, 'epoch': 2} {'type': 'loss', 'content': 0.0845898985862732, 'timestamp': '2025-09-10 02:42:22.386112', 'step': 13988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:22.428549', 'step': 13988, 'epoch': 2} {'type': 'loss', 'content': 0.09416016191244125, 'timestamp': '2025-09-10 02:42:22.444093', 'step': 13989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:22.504786', 'step': 13989, 'epoch': 2} {'type': 'loss', 'content': 0.17630635201931, 'timestamp': '2025-09-10 02:42:22.507257', 'step': 13990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:22.539475', 'step': 13990, 'epoch': 2} {'type': 'loss', 'content': 0.1443612277507782, 'timestamp': '2025-09-10 02:42:22.542437', 'step': 13991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:22.575356', 'step': 13991, 'epoch': 2} {'type': 'loss', 'content': 0.09855857491493225, 'timestamp': '2025-09-10 02:42:22.599432', 'step': 13992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:22.631302', 'step': 13992, 'epoch': 2} {'type': 'loss', 'content': 0.0753418505191803, 'timestamp': '2025-09-10 02:42:22.633980', 'step': 13993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:22.663936', 'step': 13993, 'epoch': 2} {'type': 'loss', 'content': 0.13216735422611237, 'timestamp': '2025-09-10 02:42:22.676338', 'step': 13994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:22.769528', 'step': 13994, 'epoch': 2} {'type': 'loss', 'content': 0.03238334879279137, 'timestamp': '2025-09-10 02:42:22.789381', 'step': 13995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:22.869574', 'step': 13995, 'epoch': 2} {'type': 'loss', 'content': 0.051611535251140594, 'timestamp': '2025-09-10 02:42:22.907877', 'step': 13996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:22.990321', 'step': 13996, 'epoch': 2} {'type': 'loss', 'content': 0.021094508469104767, 'timestamp': '2025-09-10 02:42:23.000319', 'step': 13997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:23.042671', 'step': 13997, 'epoch': 2} {'type': 'loss', 'content': 0.08990221470594406, 'timestamp': '2025-09-10 02:42:23.045878', 'step': 13998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:23.084575', 'step': 13998, 'epoch': 2} {'type': 'loss', 'content': 0.11331948637962341, 'timestamp': '2025-09-10 02:42:23.093245', 'step': 13999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:23.132364', 'step': 13999, 'epoch': 2} {'type': 'loss', 'content': 0.10637829452753067, 'timestamp': '2025-09-10 02:42:23.158006', 'step': 14000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14000', 'timestamp': '2025-09-10 02:42:29.485632', 'step': 14000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:29.541044', 'step': 14000, 'epoch': 2} {'type': 'loss', 'content': 0.1061067134141922, 'timestamp': '2025-09-10 02:42:29.543607', 'step': 14001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:29.574343', 'step': 14001, 'epoch': 2} {'type': 'loss', 'content': 0.09410139173269272, 'timestamp': '2025-09-10 02:42:29.576306', 'step': 14002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:29.605945', 'step': 14002, 'epoch': 2} {'type': 'loss', 'content': 0.10242660343647003, 'timestamp': '2025-09-10 02:42:29.608105', 'step': 14003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:29.638142', 'step': 14003, 'epoch': 2} {'type': 'loss', 'content': 0.09499018639326096, 'timestamp': '2025-09-10 02:42:29.661613', 'step': 14004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:29.697028', 'step': 14004, 'epoch': 2} {'type': 'loss', 'content': 0.04403351992368698, 'timestamp': '2025-09-10 02:42:29.700886', 'step': 14005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:29.734689', 'step': 14005, 'epoch': 2} {'type': 'loss', 'content': 0.06491374969482422, 'timestamp': '2025-09-10 02:42:29.739591', 'step': 14006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:29.776605', 'step': 14006, 'epoch': 2} {'type': 'loss', 'content': 0.0402417927980423, 'timestamp': '2025-09-10 02:42:29.781238', 'step': 14007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:29.821247', 'step': 14007, 'epoch': 2} {'type': 'loss', 'content': 0.09603631496429443, 'timestamp': '2025-09-10 02:42:29.844592', 'step': 14008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:29.875053', 'step': 14008, 'epoch': 2} {'type': 'loss', 'content': 0.10777847468852997, 'timestamp': '2025-09-10 02:42:29.877142', 'step': 14009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:29.907858', 'step': 14009, 'epoch': 2} {'type': 'loss', 'content': 0.11477746069431305, 'timestamp': '2025-09-10 02:42:29.910113', 'step': 14010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:29.940029', 'step': 14010, 'epoch': 2} {'type': 'loss', 'content': 0.13905398547649384, 'timestamp': '2025-09-10 02:42:29.942150', 'step': 14011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:29.971537', 'step': 14011, 'epoch': 2} {'type': 'loss', 'content': 0.10564615577459335, 'timestamp': '2025-09-10 02:42:29.995022', 'step': 14012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:30.026394', 'step': 14012, 'epoch': 2} {'type': 'loss', 'content': 0.09784319996833801, 'timestamp': '2025-09-10 02:42:30.028397', 'step': 14013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:30.057453', 'step': 14013, 'epoch': 2} {'type': 'loss', 'content': 0.021158237010240555, 'timestamp': '2025-09-10 02:42:30.059903', 'step': 14014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.091782', 'step': 14014, 'epoch': 2} {'type': 'loss', 'content': 0.1288013607263565, 'timestamp': '2025-09-10 02:42:30.093802', 'step': 14015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:30.123268', 'step': 14015, 'epoch': 2} {'type': 'loss', 'content': 0.16543647646903992, 'timestamp': '2025-09-10 02:42:30.146982', 'step': 14016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:30.177368', 'step': 14016, 'epoch': 2} {'type': 'loss', 'content': 0.09055004268884659, 'timestamp': '2025-09-10 02:42:30.179354', 'step': 14017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:42:30.209971', 'step': 14017, 'epoch': 2} {'type': 'loss', 'content': 0.0809551253914833, 'timestamp': '2025-09-10 02:42:30.214344', 'step': 14018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:30.244496', 'step': 14018, 'epoch': 2} {'type': 'loss', 'content': 0.11827388405799866, 'timestamp': '2025-09-10 02:42:30.246679', 'step': 14019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.276654', 'step': 14019, 'epoch': 2} {'type': 'loss', 'content': 0.08570995181798935, 'timestamp': '2025-09-10 02:42:30.300230', 'step': 14020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.330200', 'step': 14020, 'epoch': 2} {'type': 'loss', 'content': 0.1015341728925705, 'timestamp': '2025-09-10 02:42:30.333566', 'step': 14021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.363596', 'step': 14021, 'epoch': 2} {'type': 'loss', 'content': 0.12413116544485092, 'timestamp': '2025-09-10 02:42:30.366985', 'step': 14022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.396854', 'step': 14022, 'epoch': 2} {'type': 'loss', 'content': 0.1482372134923935, 'timestamp': '2025-09-10 02:42:30.398811', 'step': 14023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:30.428244', 'step': 14023, 'epoch': 2} {'type': 'loss', 'content': 0.12870758771896362, 'timestamp': '2025-09-10 02:42:30.452058', 'step': 14024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:30.481958', 'step': 14024, 'epoch': 2} {'type': 'loss', 'content': 0.10854338109493256, 'timestamp': '2025-09-10 02:42:30.484030', 'step': 14025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.513921', 'step': 14025, 'epoch': 2} {'type': 'loss', 'content': 0.10888490080833435, 'timestamp': '2025-09-10 02:42:30.515838', 'step': 14026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.545695', 'step': 14026, 'epoch': 2} {'type': 'loss', 'content': 0.09000980854034424, 'timestamp': '2025-09-10 02:42:30.548154', 'step': 14027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.578677', 'step': 14027, 'epoch': 2} {'type': 'loss', 'content': 0.10620459169149399, 'timestamp': '2025-09-10 02:42:30.602172', 'step': 14028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.632923', 'step': 14028, 'epoch': 2} {'type': 'loss', 'content': 0.1099296510219574, 'timestamp': '2025-09-10 02:42:30.635281', 'step': 14029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:30.665127', 'step': 14029, 'epoch': 2} {'type': 'loss', 'content': 0.156824991106987, 'timestamp': '2025-09-10 02:42:30.667860', 'step': 14030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:30.700380', 'step': 14030, 'epoch': 2} {'type': 'loss', 'content': 0.10356931388378143, 'timestamp': '2025-09-10 02:42:30.702912', 'step': 14031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:30.733185', 'step': 14031, 'epoch': 2} {'type': 'loss', 'content': 0.11050236225128174, 'timestamp': '2025-09-10 02:42:30.756447', 'step': 14032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.787034', 'step': 14032, 'epoch': 2} {'type': 'loss', 'content': 0.06204367056488991, 'timestamp': '2025-09-10 02:42:30.789179', 'step': 14033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:30.819012', 'step': 14033, 'epoch': 2} {'type': 'loss', 'content': 0.11521418392658234, 'timestamp': '2025-09-10 02:42:30.821187', 'step': 14034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.851438', 'step': 14034, 'epoch': 2} {'type': 'loss', 'content': 0.15062329173088074, 'timestamp': '2025-09-10 02:42:30.853735', 'step': 14035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:30.884721', 'step': 14035, 'epoch': 2} {'type': 'loss', 'content': 0.10252426564693451, 'timestamp': '2025-09-10 02:42:30.907992', 'step': 14036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:30.937886', 'step': 14036, 'epoch': 2} {'type': 'loss', 'content': 0.09743056446313858, 'timestamp': '2025-09-10 02:42:30.939727', 'step': 14037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:30.969567', 'step': 14037, 'epoch': 2} {'type': 'loss', 'content': 0.07979637384414673, 'timestamp': '2025-09-10 02:42:30.971625', 'step': 14038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:31.003137', 'step': 14038, 'epoch': 2} {'type': 'loss', 'content': 0.08965381234884262, 'timestamp': '2025-09-10 02:42:31.005029', 'step': 14039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:31.034400', 'step': 14039, 'epoch': 2} {'type': 'loss', 'content': 0.1364794671535492, 'timestamp': '2025-09-10 02:42:31.058032', 'step': 14040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:31.087955', 'step': 14040, 'epoch': 2} {'type': 'loss', 'content': 0.15000884234905243, 'timestamp': '2025-09-10 02:42:31.090085', 'step': 14041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:31.120939', 'step': 14041, 'epoch': 2} {'type': 'loss', 'content': 0.06362277269363403, 'timestamp': '2025-09-10 02:42:31.123169', 'step': 14042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:31.153529', 'step': 14042, 'epoch': 2} {'type': 'loss', 'content': 0.08348917961120605, 'timestamp': '2025-09-10 02:42:31.155688', 'step': 14043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:31.185081', 'step': 14043, 'epoch': 2} {'type': 'loss', 'content': 0.1073809489607811, 'timestamp': '2025-09-10 02:42:31.208681', 'step': 14044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:31.239242', 'step': 14044, 'epoch': 2} {'type': 'loss', 'content': 0.16090692579746246, 'timestamp': '2025-09-10 02:42:31.241343', 'step': 14045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:31.270881', 'step': 14045, 'epoch': 2} {'type': 'loss', 'content': 0.04396788030862808, 'timestamp': '2025-09-10 02:42:31.273015', 'step': 14046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:31.304129', 'step': 14046, 'epoch': 2} {'type': 'loss', 'content': 0.09869939833879471, 'timestamp': '2025-09-10 02:42:31.306771', 'step': 14047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:31.337808', 'step': 14047, 'epoch': 2} {'type': 'loss', 'content': 0.13165001571178436, 'timestamp': '2025-09-10 02:42:31.361761', 'step': 14048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:31.391981', 'step': 14048, 'epoch': 2} {'type': 'loss', 'content': 0.11240027844905853, 'timestamp': '2025-09-10 02:42:31.394089', 'step': 14049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:31.424717', 'step': 14049, 'epoch': 2} {'type': 'loss', 'content': 0.10469932854175568, 'timestamp': '2025-09-10 02:42:31.426827', 'step': 14050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:31.456344', 'step': 14050, 'epoch': 2} {'type': 'loss', 'content': 0.08572474122047424, 'timestamp': '2025-09-10 02:42:31.458185', 'step': 14051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:31.487483', 'step': 14051, 'epoch': 2} {'type': 'loss', 'content': 0.17146803438663483, 'timestamp': '2025-09-10 02:42:31.510760', 'step': 14052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:31.541153', 'step': 14052, 'epoch': 2} {'type': 'loss', 'content': 0.0833154171705246, 'timestamp': '2025-09-10 02:42:31.543204', 'step': 14053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:31.573358', 'step': 14053, 'epoch': 2} {'type': 'loss', 'content': 0.08877839148044586, 'timestamp': '2025-09-10 02:42:31.575598', 'step': 14054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:31.606977', 'step': 14054, 'epoch': 2} {'type': 'loss', 'content': 0.061183005571365356, 'timestamp': '2025-09-10 02:42:31.610093', 'step': 14055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:31.641254', 'step': 14055, 'epoch': 2} {'type': 'loss', 'content': 0.07982020080089569, 'timestamp': '2025-09-10 02:42:31.664976', 'step': 14056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:31.695379', 'step': 14056, 'epoch': 2} {'type': 'loss', 'content': 0.11727654188871384, 'timestamp': '2025-09-10 02:42:31.697852', 'step': 14057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:31.728381', 'step': 14057, 'epoch': 2} {'type': 'loss', 'content': 0.024372365325689316, 'timestamp': '2025-09-10 02:42:31.730507', 'step': 14058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:31.760713', 'step': 14058, 'epoch': 2} {'type': 'loss', 'content': 0.058642249554395676, 'timestamp': '2025-09-10 02:42:31.763102', 'step': 14059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:31.793192', 'step': 14059, 'epoch': 2} {'type': 'loss', 'content': 0.07451241463422775, 'timestamp': '2025-09-10 02:42:31.816480', 'step': 14060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:31.846658', 'step': 14060, 'epoch': 2} {'type': 'loss', 'content': 0.059124890714883804, 'timestamp': '2025-09-10 02:42:31.848841', 'step': 14061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:31.880696', 'step': 14061, 'epoch': 2} {'type': 'loss', 'content': 0.0803069919347763, 'timestamp': '2025-09-10 02:42:31.882689', 'step': 14062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:31.914612', 'step': 14062, 'epoch': 2} {'type': 'loss', 'content': 0.11937450617551804, 'timestamp': '2025-09-10 02:42:31.916886', 'step': 14063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:31.947292', 'step': 14063, 'epoch': 2} {'type': 'loss', 'content': 0.18165399134159088, 'timestamp': '2025-09-10 02:42:31.970643', 'step': 14064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:32.001162', 'step': 14064, 'epoch': 2} {'type': 'loss', 'content': 0.041454534977674484, 'timestamp': '2025-09-10 02:42:32.004822', 'step': 14065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.034354', 'step': 14065, 'epoch': 2} {'type': 'loss', 'content': 0.09009594470262527, 'timestamp': '2025-09-10 02:42:32.036289', 'step': 14066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:32.066320', 'step': 14066, 'epoch': 2} {'type': 'loss', 'content': 0.06853920221328735, 'timestamp': '2025-09-10 02:42:32.069020', 'step': 14067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.100061', 'step': 14067, 'epoch': 2} {'type': 'loss', 'content': 0.08222490549087524, 'timestamp': '2025-09-10 02:42:32.123799', 'step': 14068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:32.153373', 'step': 14068, 'epoch': 2} {'type': 'loss', 'content': 0.12200111895799637, 'timestamp': '2025-09-10 02:42:32.155625', 'step': 14069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:32.186027', 'step': 14069, 'epoch': 2} {'type': 'loss', 'content': 0.15872247517108917, 'timestamp': '2025-09-10 02:42:32.188114', 'step': 14070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:32.219970', 'step': 14070, 'epoch': 2} {'type': 'loss', 'content': 0.11344476789236069, 'timestamp': '2025-09-10 02:42:32.222194', 'step': 14071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.252030', 'step': 14071, 'epoch': 2} {'type': 'loss', 'content': 0.09891603142023087, 'timestamp': '2025-09-10 02:42:32.275083', 'step': 14072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.306232', 'step': 14072, 'epoch': 2} {'type': 'loss', 'content': 0.07921943068504333, 'timestamp': '2025-09-10 02:42:32.308047', 'step': 14073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.337473', 'step': 14073, 'epoch': 2} {'type': 'loss', 'content': 0.11614380776882172, 'timestamp': '2025-09-10 02:42:32.339589', 'step': 14074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.371682', 'step': 14074, 'epoch': 2} {'type': 'loss', 'content': 0.05516817420721054, 'timestamp': '2025-09-10 02:42:32.373626', 'step': 14075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.403332', 'step': 14075, 'epoch': 2} {'type': 'loss', 'content': 0.13295595347881317, 'timestamp': '2025-09-10 02:42:32.426650', 'step': 14076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:32.456977', 'step': 14076, 'epoch': 2} {'type': 'loss', 'content': 0.08509837090969086, 'timestamp': '2025-09-10 02:42:32.458937', 'step': 14077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:32.488509', 'step': 14077, 'epoch': 2} {'type': 'loss', 'content': 0.028937458992004395, 'timestamp': '2025-09-10 02:42:32.490661', 'step': 14078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.522111', 'step': 14078, 'epoch': 2} {'type': 'loss', 'content': 0.07643266022205353, 'timestamp': '2025-09-10 02:42:32.524000', 'step': 14079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.554151', 'step': 14079, 'epoch': 2} {'type': 'loss', 'content': 0.07786102592945099, 'timestamp': '2025-09-10 02:42:32.577455', 'step': 14080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:32.607295', 'step': 14080, 'epoch': 2} {'type': 'loss', 'content': 0.07781938463449478, 'timestamp': '2025-09-10 02:42:32.615470', 'step': 14081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:32.655197', 'step': 14081, 'epoch': 2} {'type': 'loss', 'content': 0.1182398870587349, 'timestamp': '2025-09-10 02:42:32.657677', 'step': 14082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.688011', 'step': 14082, 'epoch': 2} {'type': 'loss', 'content': 0.08416957408189774, 'timestamp': '2025-09-10 02:42:32.690306', 'step': 14083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.721421', 'step': 14083, 'epoch': 2} {'type': 'loss', 'content': 0.07704249024391174, 'timestamp': '2025-09-10 02:42:32.745139', 'step': 14084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:32.776031', 'step': 14084, 'epoch': 2} {'type': 'loss', 'content': 0.14996595680713654, 'timestamp': '2025-09-10 02:42:32.777930', 'step': 14085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.807398', 'step': 14085, 'epoch': 2} {'type': 'loss', 'content': 0.09128764271736145, 'timestamp': '2025-09-10 02:42:32.809177', 'step': 14086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:32.839290', 'step': 14086, 'epoch': 2} {'type': 'loss', 'content': 0.03755783662199974, 'timestamp': '2025-09-10 02:42:32.841511', 'step': 14087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:32.871074', 'step': 14087, 'epoch': 2} {'type': 'loss', 'content': 0.11425807327032089, 'timestamp': '2025-09-10 02:42:32.894344', 'step': 14088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.925730', 'step': 14088, 'epoch': 2} {'type': 'loss', 'content': 0.15791991353034973, 'timestamp': '2025-09-10 02:42:32.927685', 'step': 14089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:32.957176', 'step': 14089, 'epoch': 2} {'type': 'loss', 'content': 0.07557067275047302, 'timestamp': '2025-09-10 02:42:32.960812', 'step': 14090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:32.993700', 'step': 14090, 'epoch': 2} {'type': 'loss', 'content': 0.0893552154302597, 'timestamp': '2025-09-10 02:42:32.996123', 'step': 14091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:33.027673', 'step': 14091, 'epoch': 2} {'type': 'loss', 'content': 0.08193459361791611, 'timestamp': '2025-09-10 02:42:33.051126', 'step': 14092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:33.083754', 'step': 14092, 'epoch': 2} {'type': 'loss', 'content': 0.07933485507965088, 'timestamp': '2025-09-10 02:42:33.086884', 'step': 14093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:33.127147', 'step': 14093, 'epoch': 2} {'type': 'loss', 'content': 0.07997202128171921, 'timestamp': '2025-09-10 02:42:33.129608', 'step': 14094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:33.160853', 'step': 14094, 'epoch': 2} {'type': 'loss', 'content': 0.1394040733575821, 'timestamp': '2025-09-10 02:42:33.162943', 'step': 14095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:33.196109', 'step': 14095, 'epoch': 2} {'type': 'loss', 'content': 0.09141656756401062, 'timestamp': '2025-09-10 02:42:33.220246', 'step': 14096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:33.251111', 'step': 14096, 'epoch': 2} {'type': 'loss', 'content': 0.020776407793164253, 'timestamp': '2025-09-10 02:42:33.253421', 'step': 14097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:33.283775', 'step': 14097, 'epoch': 2} {'type': 'loss', 'content': 0.08234984427690506, 'timestamp': '2025-09-10 02:42:33.289168', 'step': 14098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:33.322534', 'step': 14098, 'epoch': 2} {'type': 'loss', 'content': 0.10100387781858444, 'timestamp': '2025-09-10 02:42:33.325571', 'step': 14099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:33.355143', 'step': 14099, 'epoch': 2} {'type': 'loss', 'content': 0.12389994412660599, 'timestamp': '2025-09-10 02:42:33.378336', 'step': 14100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:33.410358', 'step': 14100, 'epoch': 2} {'type': 'loss', 'content': 0.17788726091384888, 'timestamp': '2025-09-10 02:42:33.412393', 'step': 14101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:33.441975', 'step': 14101, 'epoch': 2} {'type': 'loss', 'content': 0.03692431002855301, 'timestamp': '2025-09-10 02:42:33.443986', 'step': 14102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:42:33.473576', 'step': 14102, 'epoch': 2} {'type': 'loss', 'content': 0.1877763569355011, 'timestamp': '2025-09-10 02:42:33.477860', 'step': 14103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:33.511237', 'step': 14103, 'epoch': 2} {'type': 'loss', 'content': 0.1011657640337944, 'timestamp': '2025-09-10 02:42:33.534303', 'step': 14104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:33.565147', 'step': 14104, 'epoch': 2} {'type': 'loss', 'content': 0.04942503944039345, 'timestamp': '2025-09-10 02:42:33.567271', 'step': 14105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:33.596855', 'step': 14105, 'epoch': 2} {'type': 'loss', 'content': 0.037647951394319534, 'timestamp': '2025-09-10 02:42:33.599298', 'step': 14106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:33.629722', 'step': 14106, 'epoch': 2} {'type': 'loss', 'content': 0.041250910609960556, 'timestamp': '2025-09-10 02:42:33.631734', 'step': 14107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:33.663524', 'step': 14107, 'epoch': 2} {'type': 'loss', 'content': 0.05632602795958519, 'timestamp': '2025-09-10 02:42:33.687089', 'step': 14108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:33.718385', 'step': 14108, 'epoch': 2} {'type': 'loss', 'content': 0.17239893972873688, 'timestamp': '2025-09-10 02:42:33.720568', 'step': 14109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:33.751131', 'step': 14109, 'epoch': 2} {'type': 'loss', 'content': 0.09315256029367447, 'timestamp': '2025-09-10 02:42:33.753330', 'step': 14110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:33.784151', 'step': 14110, 'epoch': 2} {'type': 'loss', 'content': 0.050842106342315674, 'timestamp': '2025-09-10 02:42:33.788613', 'step': 14111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:33.820191', 'step': 14111, 'epoch': 2} {'type': 'loss', 'content': 0.12308603525161743, 'timestamp': '2025-09-10 02:42:33.843765', 'step': 14112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:33.873607', 'step': 14112, 'epoch': 2} {'type': 'loss', 'content': 0.08256759494543076, 'timestamp': '2025-09-10 02:42:33.875686', 'step': 14113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:33.905855', 'step': 14113, 'epoch': 2} {'type': 'loss', 'content': 0.07850909978151321, 'timestamp': '2025-09-10 02:42:33.907794', 'step': 14114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:33.939210', 'step': 14114, 'epoch': 2} {'type': 'loss', 'content': 0.1477225422859192, 'timestamp': '2025-09-10 02:42:33.941398', 'step': 14115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:33.971354', 'step': 14115, 'epoch': 2} {'type': 'loss', 'content': 0.0781756341457367, 'timestamp': '2025-09-10 02:42:33.994499', 'step': 14116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:34.024148', 'step': 14116, 'epoch': 2} {'type': 'loss', 'content': 0.08229454606771469, 'timestamp': '2025-09-10 02:42:34.025960', 'step': 14117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:34.057209', 'step': 14117, 'epoch': 2} {'type': 'loss', 'content': 0.16900768876075745, 'timestamp': '2025-09-10 02:42:34.059619', 'step': 14118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:34.089847', 'step': 14118, 'epoch': 2} {'type': 'loss', 'content': 0.042044635862112045, 'timestamp': '2025-09-10 02:42:34.093615', 'step': 14119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:34.126296', 'step': 14119, 'epoch': 2} {'type': 'loss', 'content': 0.11877891421318054, 'timestamp': '2025-09-10 02:42:34.150007', 'step': 14120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:34.179900', 'step': 14120, 'epoch': 2} {'type': 'loss', 'content': 0.027831345796585083, 'timestamp': '2025-09-10 02:42:34.182075', 'step': 14121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:34.212136', 'step': 14121, 'epoch': 2} {'type': 'loss', 'content': 0.07274828106164932, 'timestamp': '2025-09-10 02:42:34.214420', 'step': 14122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:34.245955', 'step': 14122, 'epoch': 2} {'type': 'loss', 'content': 0.040201637893915176, 'timestamp': '2025-09-10 02:42:34.247550', 'step': 14123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:34.278351', 'step': 14123, 'epoch': 2} {'type': 'loss', 'content': 0.07696954905986786, 'timestamp': '2025-09-10 02:42:34.301613', 'step': 14124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:34.330697', 'step': 14124, 'epoch': 2} {'type': 'loss', 'content': 0.03933560848236084, 'timestamp': '2025-09-10 02:42:34.334100', 'step': 14125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:34.367089', 'step': 14125, 'epoch': 2} {'type': 'loss', 'content': 0.04956758767366409, 'timestamp': '2025-09-10 02:42:34.369414', 'step': 14126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:34.399885', 'step': 14126, 'epoch': 2} {'type': 'loss', 'content': 0.2837482690811157, 'timestamp': '2025-09-10 02:42:34.401970', 'step': 14127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:34.431572', 'step': 14127, 'epoch': 2} {'type': 'loss', 'content': 0.07734939455986023, 'timestamp': '2025-09-10 02:42:34.454849', 'step': 14128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:34.485031', 'step': 14128, 'epoch': 2} {'type': 'loss', 'content': 0.08284242451190948, 'timestamp': '2025-09-10 02:42:34.487397', 'step': 14129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:34.517653', 'step': 14129, 'epoch': 2} {'type': 'loss', 'content': 0.08842651546001434, 'timestamp': '2025-09-10 02:42:34.521197', 'step': 14130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:34.551264', 'step': 14130, 'epoch': 2} {'type': 'loss', 'content': 0.012738474644720554, 'timestamp': '2025-09-10 02:42:34.553423', 'step': 14131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:34.583066', 'step': 14131, 'epoch': 2} {'type': 'loss', 'content': 0.05976353585720062, 'timestamp': '2025-09-10 02:42:34.606595', 'step': 14132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:34.646847', 'step': 14132, 'epoch': 2} {'type': 'loss', 'content': 0.0952434167265892, 'timestamp': '2025-09-10 02:42:34.649198', 'step': 14133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:34.678478', 'step': 14133, 'epoch': 2} {'type': 'loss', 'content': 0.16760389506816864, 'timestamp': '2025-09-10 02:42:34.680678', 'step': 14134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:34.711383', 'step': 14134, 'epoch': 2} {'type': 'loss', 'content': 0.16142937541007996, 'timestamp': '2025-09-10 02:42:34.713399', 'step': 14135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:34.742900', 'step': 14135, 'epoch': 2} {'type': 'loss', 'content': 0.036419570446014404, 'timestamp': '2025-09-10 02:42:34.766115', 'step': 14136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:34.795233', 'step': 14136, 'epoch': 2} {'type': 'loss', 'content': 0.05889907851815224, 'timestamp': '2025-09-10 02:42:34.797360', 'step': 14137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:34.826901', 'step': 14137, 'epoch': 2} {'type': 'loss', 'content': 0.11084257811307907, 'timestamp': '2025-09-10 02:42:34.828916', 'step': 14138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:34.858254', 'step': 14138, 'epoch': 2} {'type': 'loss', 'content': 0.20841526985168457, 'timestamp': '2025-09-10 02:42:34.860289', 'step': 14139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:34.890073', 'step': 14139, 'epoch': 2} {'type': 'loss', 'content': 0.10226943343877792, 'timestamp': '2025-09-10 02:42:34.913110', 'step': 14140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:34.943189', 'step': 14140, 'epoch': 2} {'type': 'loss', 'content': 0.0711192712187767, 'timestamp': '2025-09-10 02:42:34.945382', 'step': 14141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:34.975136', 'step': 14141, 'epoch': 2} {'type': 'loss', 'content': 0.09075034409761429, 'timestamp': '2025-09-10 02:42:34.977377', 'step': 14142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:35.007786', 'step': 14142, 'epoch': 2} {'type': 'loss', 'content': 0.16324646770954132, 'timestamp': '2025-09-10 02:42:35.010251', 'step': 14143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:35.042581', 'step': 14143, 'epoch': 2} {'type': 'loss', 'content': 0.13796843588352203, 'timestamp': '2025-09-10 02:42:35.065917', 'step': 14144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:35.095825', 'step': 14144, 'epoch': 2} {'type': 'loss', 'content': 0.1603100299835205, 'timestamp': '2025-09-10 02:42:35.098218', 'step': 14145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:35.128679', 'step': 14145, 'epoch': 2} {'type': 'loss', 'content': 0.047661248594522476, 'timestamp': '2025-09-10 02:42:35.131093', 'step': 14146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:35.162655', 'step': 14146, 'epoch': 2} {'type': 'loss', 'content': 0.061056364327669144, 'timestamp': '2025-09-10 02:42:35.164977', 'step': 14147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:35.194807', 'step': 14147, 'epoch': 2} {'type': 'loss', 'content': 0.09522315859794617, 'timestamp': '2025-09-10 02:42:35.218602', 'step': 14148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:35.249034', 'step': 14148, 'epoch': 2} {'type': 'loss', 'content': 0.19180110096931458, 'timestamp': '2025-09-10 02:42:35.251309', 'step': 14149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:35.281574', 'step': 14149, 'epoch': 2} {'type': 'loss', 'content': 0.183134526014328, 'timestamp': '2025-09-10 02:42:35.283888', 'step': 14150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:35.313284', 'step': 14150, 'epoch': 2} {'type': 'loss', 'content': 0.12709635496139526, 'timestamp': '2025-09-10 02:42:35.315469', 'step': 14151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:35.345323', 'step': 14151, 'epoch': 2} {'type': 'loss', 'content': 0.05517803877592087, 'timestamp': '2025-09-10 02:42:35.368998', 'step': 14152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:35.399878', 'step': 14152, 'epoch': 2} {'type': 'loss', 'content': 0.17014789581298828, 'timestamp': '2025-09-10 02:42:35.402248', 'step': 14153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:35.432312', 'step': 14153, 'epoch': 2} {'type': 'loss', 'content': 0.08653504401445389, 'timestamp': '2025-09-10 02:42:35.435165', 'step': 14154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:35.465196', 'step': 14154, 'epoch': 2} {'type': 'loss', 'content': 0.07405411452054977, 'timestamp': '2025-09-10 02:42:35.467561', 'step': 14155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:35.498219', 'step': 14155, 'epoch': 2} {'type': 'loss', 'content': 0.11290088295936584, 'timestamp': '2025-09-10 02:42:35.521862', 'step': 14156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:35.552035', 'step': 14156, 'epoch': 2} {'type': 'loss', 'content': 0.11487869918346405, 'timestamp': '2025-09-10 02:42:35.557238', 'step': 14157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:35.594818', 'step': 14157, 'epoch': 2} {'type': 'loss', 'content': 0.1302245557308197, 'timestamp': '2025-09-10 02:42:35.598619', 'step': 14158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:35.635319', 'step': 14158, 'epoch': 2} {'type': 'loss', 'content': 0.10812056064605713, 'timestamp': '2025-09-10 02:42:35.637606', 'step': 14159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:35.673959', 'step': 14159, 'epoch': 2} {'type': 'loss', 'content': 0.0777190625667572, 'timestamp': '2025-09-10 02:42:35.699555', 'step': 14160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:35.737876', 'step': 14160, 'epoch': 2} {'type': 'loss', 'content': 0.10697685182094574, 'timestamp': '2025-09-10 02:42:35.740098', 'step': 14161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:35.770543', 'step': 14161, 'epoch': 2} {'type': 'loss', 'content': 0.10068884491920471, 'timestamp': '2025-09-10 02:42:35.772967', 'step': 14162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:35.803450', 'step': 14162, 'epoch': 2} {'type': 'loss', 'content': 0.07620053738355637, 'timestamp': '2025-09-10 02:42:35.805802', 'step': 14163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:35.835429', 'step': 14163, 'epoch': 2} {'type': 'loss', 'content': 0.08126799017190933, 'timestamp': '2025-09-10 02:42:35.860209', 'step': 14164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:35.889898', 'step': 14164, 'epoch': 2} {'type': 'loss', 'content': 0.16190427541732788, 'timestamp': '2025-09-10 02:42:35.892098', 'step': 14165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:42:35.923137', 'step': 14165, 'epoch': 2} {'type': 'loss', 'content': 0.1297391951084137, 'timestamp': '2025-09-10 02:42:35.927394', 'step': 14166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:35.958980', 'step': 14166, 'epoch': 2} {'type': 'loss', 'content': 0.09360848367214203, 'timestamp': '2025-09-10 02:42:35.961620', 'step': 14167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:35.992190', 'step': 14167, 'epoch': 2} {'type': 'loss', 'content': 0.11523586511611938, 'timestamp': '2025-09-10 02:42:36.016099', 'step': 14168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:36.048421', 'step': 14168, 'epoch': 2} {'type': 'loss', 'content': 0.03931332379579544, 'timestamp': '2025-09-10 02:42:36.050871', 'step': 14169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:36.079992', 'step': 14169, 'epoch': 2} {'type': 'loss', 'content': 0.14005717635154724, 'timestamp': '2025-09-10 02:42:36.082578', 'step': 14170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:36.112785', 'step': 14170, 'epoch': 2} {'type': 'loss', 'content': 0.1842087358236313, 'timestamp': '2025-09-10 02:42:36.114995', 'step': 14171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:36.144692', 'step': 14171, 'epoch': 2} {'type': 'loss', 'content': 0.10734321922063828, 'timestamp': '2025-09-10 02:42:36.168173', 'step': 14172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:36.199214', 'step': 14172, 'epoch': 2} {'type': 'loss', 'content': 0.05959044024348259, 'timestamp': '2025-09-10 02:42:36.201649', 'step': 14173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:36.232463', 'step': 14173, 'epoch': 2} {'type': 'loss', 'content': 0.16484133899211884, 'timestamp': '2025-09-10 02:42:36.234513', 'step': 14174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:36.264327', 'step': 14174, 'epoch': 2} {'type': 'loss', 'content': 0.08810406923294067, 'timestamp': '2025-09-10 02:42:36.266758', 'step': 14175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:36.296621', 'step': 14175, 'epoch': 2} {'type': 'loss', 'content': 0.14740659296512604, 'timestamp': '2025-09-10 02:42:36.320135', 'step': 14176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:42:36.350517', 'step': 14176, 'epoch': 2} {'type': 'loss', 'content': 0.08369984477758408, 'timestamp': '2025-09-10 02:42:36.355575', 'step': 14177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:36.385681', 'step': 14177, 'epoch': 2} {'type': 'loss', 'content': 0.11514974385499954, 'timestamp': '2025-09-10 02:42:36.387937', 'step': 14178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:36.417809', 'step': 14178, 'epoch': 2} {'type': 'loss', 'content': 0.10987407714128494, 'timestamp': '2025-09-10 02:42:36.420153', 'step': 14179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:36.451249', 'step': 14179, 'epoch': 2} {'type': 'loss', 'content': 0.0834936574101448, 'timestamp': '2025-09-10 02:42:36.474524', 'step': 14180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:36.504847', 'step': 14180, 'epoch': 2} {'type': 'loss', 'content': 0.09762868285179138, 'timestamp': '2025-09-10 02:42:36.507080', 'step': 14181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:36.536551', 'step': 14181, 'epoch': 2} {'type': 'loss', 'content': 0.1866992712020874, 'timestamp': '2025-09-10 02:42:36.538607', 'step': 14182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:36.571538', 'step': 14182, 'epoch': 2} {'type': 'loss', 'content': 0.13653679192066193, 'timestamp': '2025-09-10 02:42:36.574127', 'step': 14183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:36.604254', 'step': 14183, 'epoch': 2} {'type': 'loss', 'content': 0.08441979438066483, 'timestamp': '2025-09-10 02:42:36.627847', 'step': 14184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:36.658038', 'step': 14184, 'epoch': 2} {'type': 'loss', 'content': 0.042666152119636536, 'timestamp': '2025-09-10 02:42:36.660143', 'step': 14185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:36.690219', 'step': 14185, 'epoch': 2} {'type': 'loss', 'content': 0.03476058319211006, 'timestamp': '2025-09-10 02:42:36.692325', 'step': 14186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:36.722147', 'step': 14186, 'epoch': 2} {'type': 'loss', 'content': 0.07976683974266052, 'timestamp': '2025-09-10 02:42:36.724480', 'step': 14187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:36.754246', 'step': 14187, 'epoch': 2} {'type': 'loss', 'content': 0.07498635351657867, 'timestamp': '2025-09-10 02:42:36.777640', 'step': 14188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:36.817031', 'step': 14188, 'epoch': 2} {'type': 'loss', 'content': 0.09969045966863632, 'timestamp': '2025-09-10 02:42:36.819183', 'step': 14189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:36.849257', 'step': 14189, 'epoch': 2} {'type': 'loss', 'content': 0.14803989231586456, 'timestamp': '2025-09-10 02:42:36.851945', 'step': 14190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:36.882122', 'step': 14190, 'epoch': 2} {'type': 'loss', 'content': 0.17562422156333923, 'timestamp': '2025-09-10 02:42:36.884425', 'step': 14191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:36.914193', 'step': 14191, 'epoch': 2} {'type': 'loss', 'content': 0.12970516085624695, 'timestamp': '2025-09-10 02:42:36.937444', 'step': 14192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:36.967577', 'step': 14192, 'epoch': 2} {'type': 'loss', 'content': 0.05620742589235306, 'timestamp': '2025-09-10 02:42:36.969952', 'step': 14193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:36.999606', 'step': 14193, 'epoch': 2} {'type': 'loss', 'content': 0.1018347293138504, 'timestamp': '2025-09-10 02:42:37.001893', 'step': 14194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.031582', 'step': 14194, 'epoch': 2} {'type': 'loss', 'content': 0.060292985290288925, 'timestamp': '2025-09-10 02:42:37.033778', 'step': 14195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.063982', 'step': 14195, 'epoch': 2} {'type': 'loss', 'content': 0.10253073275089264, 'timestamp': '2025-09-10 02:42:37.087417', 'step': 14196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.118004', 'step': 14196, 'epoch': 2} {'type': 'loss', 'content': 0.12544475495815277, 'timestamp': '2025-09-10 02:42:37.120241', 'step': 14197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.150134', 'step': 14197, 'epoch': 2} {'type': 'loss', 'content': 0.16282916069030762, 'timestamp': '2025-09-10 02:42:37.152468', 'step': 14198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:37.184043', 'step': 14198, 'epoch': 2} {'type': 'loss', 'content': 0.09337670356035233, 'timestamp': '2025-09-10 02:42:37.186666', 'step': 14199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:37.217572', 'step': 14199, 'epoch': 2} {'type': 'loss', 'content': 0.1161915734410286, 'timestamp': '2025-09-10 02:42:37.241301', 'step': 14200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.271672', 'step': 14200, 'epoch': 2} {'type': 'loss', 'content': 0.13157057762145996, 'timestamp': '2025-09-10 02:42:37.273845', 'step': 14201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:37.303743', 'step': 14201, 'epoch': 2} {'type': 'loss', 'content': 0.05737488344311714, 'timestamp': '2025-09-10 02:42:37.306368', 'step': 14202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:37.336222', 'step': 14202, 'epoch': 2} {'type': 'loss', 'content': 0.11727021634578705, 'timestamp': '2025-09-10 02:42:37.338336', 'step': 14203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.368778', 'step': 14203, 'epoch': 2} {'type': 'loss', 'content': 0.09623946249485016, 'timestamp': '2025-09-10 02:42:37.392358', 'step': 14204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:37.423673', 'step': 14204, 'epoch': 2} {'type': 'loss', 'content': 0.08732850849628448, 'timestamp': '2025-09-10 02:42:37.426071', 'step': 14205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.456088', 'step': 14205, 'epoch': 2} {'type': 'loss', 'content': 0.110286645591259, 'timestamp': '2025-09-10 02:42:37.459658', 'step': 14206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:37.490613', 'step': 14206, 'epoch': 2} {'type': 'loss', 'content': 0.1361517459154129, 'timestamp': '2025-09-10 02:42:37.493191', 'step': 14207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:37.523134', 'step': 14207, 'epoch': 2} {'type': 'loss', 'content': 0.14579640328884125, 'timestamp': '2025-09-10 02:42:37.547910', 'step': 14208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.578370', 'step': 14208, 'epoch': 2} {'type': 'loss', 'content': 0.0586567185819149, 'timestamp': '2025-09-10 02:42:37.580796', 'step': 14209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:37.610412', 'step': 14209, 'epoch': 2} {'type': 'loss', 'content': 0.1724282056093216, 'timestamp': '2025-09-10 02:42:37.612333', 'step': 14210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.643471', 'step': 14210, 'epoch': 2} {'type': 'loss', 'content': 0.05156972259283066, 'timestamp': '2025-09-10 02:42:37.645450', 'step': 14211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.675884', 'step': 14211, 'epoch': 2} {'type': 'loss', 'content': 0.10025735944509506, 'timestamp': '2025-09-10 02:42:37.699515', 'step': 14212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:37.729507', 'step': 14212, 'epoch': 2} {'type': 'loss', 'content': 0.12679201364517212, 'timestamp': '2025-09-10 02:42:37.732242', 'step': 14213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:37.762088', 'step': 14213, 'epoch': 2} {'type': 'loss', 'content': 0.08653553575277328, 'timestamp': '2025-09-10 02:42:37.764565', 'step': 14214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:37.795636', 'step': 14214, 'epoch': 2} {'type': 'loss', 'content': 0.09805282205343246, 'timestamp': '2025-09-10 02:42:37.797890', 'step': 14215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:37.828197', 'step': 14215, 'epoch': 2} {'type': 'loss', 'content': 0.12581095099449158, 'timestamp': '2025-09-10 02:42:37.851692', 'step': 14216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:37.882283', 'step': 14216, 'epoch': 2} {'type': 'loss', 'content': 0.0898515060544014, 'timestamp': '2025-09-10 02:42:37.884277', 'step': 14217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:37.914912', 'step': 14217, 'epoch': 2} {'type': 'loss', 'content': 0.04270651564002037, 'timestamp': '2025-09-10 02:42:37.917125', 'step': 14218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:37.947111', 'step': 14218, 'epoch': 2} {'type': 'loss', 'content': 0.09303530305624008, 'timestamp': '2025-09-10 02:42:37.950137', 'step': 14219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:37.980205', 'step': 14219, 'epoch': 2} {'type': 'loss', 'content': 0.06527672708034515, 'timestamp': '2025-09-10 02:42:38.003697', 'step': 14220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.034203', 'step': 14220, 'epoch': 2} {'type': 'loss', 'content': 0.1286427527666092, 'timestamp': '2025-09-10 02:42:38.036640', 'step': 14221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:38.066613', 'step': 14221, 'epoch': 2} {'type': 'loss', 'content': 0.1353071928024292, 'timestamp': '2025-09-10 02:42:38.068841', 'step': 14222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:38.098280', 'step': 14222, 'epoch': 2} {'type': 'loss', 'content': 0.07797245681285858, 'timestamp': '2025-09-10 02:42:38.100892', 'step': 14223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.131160', 'step': 14223, 'epoch': 2} {'type': 'loss', 'content': 0.07213927805423737, 'timestamp': '2025-09-10 02:42:38.154798', 'step': 14224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.185130', 'step': 14224, 'epoch': 2} {'type': 'loss', 'content': 0.1262737363576889, 'timestamp': '2025-09-10 02:42:38.187249', 'step': 14225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:38.216760', 'step': 14225, 'epoch': 2} {'type': 'loss', 'content': 0.11854907870292664, 'timestamp': '2025-09-10 02:42:38.219320', 'step': 14226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.248625', 'step': 14226, 'epoch': 2} {'type': 'loss', 'content': 0.11942347884178162, 'timestamp': '2025-09-10 02:42:38.250771', 'step': 14227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.280735', 'step': 14227, 'epoch': 2} {'type': 'loss', 'content': 0.15819378197193146, 'timestamp': '2025-09-10 02:42:38.304441', 'step': 14228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.334819', 'step': 14228, 'epoch': 2} {'type': 'loss', 'content': 0.038631632924079895, 'timestamp': '2025-09-10 02:42:38.337084', 'step': 14229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.365984', 'step': 14229, 'epoch': 2} {'type': 'loss', 'content': 0.17847900092601776, 'timestamp': '2025-09-10 02:42:38.369549', 'step': 14230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:38.400347', 'step': 14230, 'epoch': 2} {'type': 'loss', 'content': 0.08136316388845444, 'timestamp': '2025-09-10 02:42:38.402638', 'step': 14231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:38.432599', 'step': 14231, 'epoch': 2} {'type': 'loss', 'content': 0.12907254695892334, 'timestamp': '2025-09-10 02:42:38.456029', 'step': 14232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:38.485986', 'step': 14232, 'epoch': 2} {'type': 'loss', 'content': 0.08019943535327911, 'timestamp': '2025-09-10 02:42:38.488669', 'step': 14233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:38.520414', 'step': 14233, 'epoch': 2} {'type': 'loss', 'content': 0.14576900005340576, 'timestamp': '2025-09-10 02:42:38.522373', 'step': 14234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:38.552132', 'step': 14234, 'epoch': 2} {'type': 'loss', 'content': 0.032120805233716965, 'timestamp': '2025-09-10 02:42:38.554286', 'step': 14235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:38.583780', 'step': 14235, 'epoch': 2} {'type': 'loss', 'content': 0.06632709503173828, 'timestamp': '2025-09-10 02:42:38.607313', 'step': 14236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.638460', 'step': 14236, 'epoch': 2} {'type': 'loss', 'content': 0.04417562857270241, 'timestamp': '2025-09-10 02:42:38.640125', 'step': 14237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.670033', 'step': 14237, 'epoch': 2} {'type': 'loss', 'content': 0.11716262996196747, 'timestamp': '2025-09-10 02:42:38.673413', 'step': 14238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:38.703035', 'step': 14238, 'epoch': 2} {'type': 'loss', 'content': 0.11795080453157425, 'timestamp': '2025-09-10 02:42:38.705353', 'step': 14239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.734676', 'step': 14239, 'epoch': 2} {'type': 'loss', 'content': 0.11565545201301575, 'timestamp': '2025-09-10 02:42:38.758333', 'step': 14240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:38.788776', 'step': 14240, 'epoch': 2} {'type': 'loss', 'content': 0.037961434572935104, 'timestamp': '2025-09-10 02:42:38.792710', 'step': 14241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:38.825675', 'step': 14241, 'epoch': 2} {'type': 'loss', 'content': 0.06395294517278671, 'timestamp': '2025-09-10 02:42:38.828132', 'step': 14242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:38.858413', 'step': 14242, 'epoch': 2} {'type': 'loss', 'content': 0.16846592724323273, 'timestamp': '2025-09-10 02:42:38.860481', 'step': 14243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:38.890969', 'step': 14243, 'epoch': 2} {'type': 'loss', 'content': 0.0857577696442604, 'timestamp': '2025-09-10 02:42:38.914534', 'step': 14244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:38.945189', 'step': 14244, 'epoch': 2} {'type': 'loss', 'content': 0.10560666769742966, 'timestamp': '2025-09-10 02:42:38.947217', 'step': 14245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:38.976544', 'step': 14245, 'epoch': 2} {'type': 'loss', 'content': 0.07088454812765121, 'timestamp': '2025-09-10 02:42:38.981079', 'step': 14246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:39.020777', 'step': 14246, 'epoch': 2} {'type': 'loss', 'content': 0.19227758049964905, 'timestamp': '2025-09-10 02:42:39.030902', 'step': 14247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.076089', 'step': 14247, 'epoch': 2} {'type': 'loss', 'content': 0.1076318547129631, 'timestamp': '2025-09-10 02:42:39.100863', 'step': 14248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:39.130775', 'step': 14248, 'epoch': 2} {'type': 'loss', 'content': 0.08986129611730576, 'timestamp': '2025-09-10 02:42:39.132837', 'step': 14249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.162517', 'step': 14249, 'epoch': 2} {'type': 'loss', 'content': 0.11348080635070801, 'timestamp': '2025-09-10 02:42:39.164641', 'step': 14250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.194473', 'step': 14250, 'epoch': 2} {'type': 'loss', 'content': 0.09398729354143143, 'timestamp': '2025-09-10 02:42:39.196796', 'step': 14251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:39.227117', 'step': 14251, 'epoch': 2} {'type': 'loss', 'content': 0.08029475808143616, 'timestamp': '2025-09-10 02:42:39.250604', 'step': 14252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:39.281599', 'step': 14252, 'epoch': 2} {'type': 'loss', 'content': 0.08087556809186935, 'timestamp': '2025-09-10 02:42:39.283689', 'step': 14253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:39.313269', 'step': 14253, 'epoch': 2} {'type': 'loss', 'content': 0.09676294028759003, 'timestamp': '2025-09-10 02:42:39.315634', 'step': 14254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:39.345934', 'step': 14254, 'epoch': 2} {'type': 'loss', 'content': 0.09018001705408096, 'timestamp': '2025-09-10 02:42:39.348155', 'step': 14255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.378545', 'step': 14255, 'epoch': 2} {'type': 'loss', 'content': 0.1270526647567749, 'timestamp': '2025-09-10 02:42:39.402126', 'step': 14256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:39.432997', 'step': 14256, 'epoch': 2} {'type': 'loss', 'content': 0.10528375208377838, 'timestamp': '2025-09-10 02:42:39.435105', 'step': 14257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.465245', 'step': 14257, 'epoch': 2} {'type': 'loss', 'content': 0.16665413975715637, 'timestamp': '2025-09-10 02:42:39.467227', 'step': 14258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.498033', 'step': 14258, 'epoch': 2} {'type': 'loss', 'content': 0.0967344418168068, 'timestamp': '2025-09-10 02:42:39.500351', 'step': 14259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:39.533021', 'step': 14259, 'epoch': 2} {'type': 'loss', 'content': 0.1282985657453537, 'timestamp': '2025-09-10 02:42:39.558674', 'step': 14260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.595314', 'step': 14260, 'epoch': 2} {'type': 'loss', 'content': 0.12424934655427933, 'timestamp': '2025-09-10 02:42:39.599369', 'step': 14261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:39.635249', 'step': 14261, 'epoch': 2} {'type': 'loss', 'content': 0.15915343165397644, 'timestamp': '2025-09-10 02:42:39.645876', 'step': 14262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.685992', 'step': 14262, 'epoch': 2} {'type': 'loss', 'content': 0.15613949298858643, 'timestamp': '2025-09-10 02:42:39.691460', 'step': 14263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.726895', 'step': 14263, 'epoch': 2} {'type': 'loss', 'content': 0.07775922864675522, 'timestamp': '2025-09-10 02:42:39.752634', 'step': 14264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:39.790838', 'step': 14264, 'epoch': 2} {'type': 'loss', 'content': 0.1425165981054306, 'timestamp': '2025-09-10 02:42:39.795345', 'step': 14265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:39.833182', 'step': 14265, 'epoch': 2} {'type': 'loss', 'content': 0.0836612805724144, 'timestamp': '2025-09-10 02:42:39.835350', 'step': 14266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:39.865320', 'step': 14266, 'epoch': 2} {'type': 'loss', 'content': 0.05092228576540947, 'timestamp': '2025-09-10 02:42:39.867645', 'step': 14267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:39.898528', 'step': 14267, 'epoch': 2} {'type': 'loss', 'content': 0.1380537897348404, 'timestamp': '2025-09-10 02:42:39.922090', 'step': 14268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:39.951472', 'step': 14268, 'epoch': 2} {'type': 'loss', 'content': 0.08825568854808807, 'timestamp': '2025-09-10 02:42:39.953564', 'step': 14269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:39.984055', 'step': 14269, 'epoch': 2} {'type': 'loss', 'content': 0.13943737745285034, 'timestamp': '2025-09-10 02:42:39.986321', 'step': 14270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:40.017084', 'step': 14270, 'epoch': 2} {'type': 'loss', 'content': 0.06231369823217392, 'timestamp': '2025-09-10 02:42:40.019797', 'step': 14271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:40.049840', 'step': 14271, 'epoch': 2} {'type': 'loss', 'content': 0.13233524560928345, 'timestamp': '2025-09-10 02:42:40.075503', 'step': 14272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.105570', 'step': 14272, 'epoch': 2} {'type': 'loss', 'content': 0.04249531403183937, 'timestamp': '2025-09-10 02:42:40.107852', 'step': 14273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.137945', 'step': 14273, 'epoch': 2} {'type': 'loss', 'content': 0.06960561126470566, 'timestamp': '2025-09-10 02:42:40.140079', 'step': 14274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.170172', 'step': 14274, 'epoch': 2} {'type': 'loss', 'content': 0.08125443756580353, 'timestamp': '2025-09-10 02:42:40.172434', 'step': 14275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.202235', 'step': 14275, 'epoch': 2} {'type': 'loss', 'content': 0.07295846939086914, 'timestamp': '2025-09-10 02:42:40.225691', 'step': 14276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.258507', 'step': 14276, 'epoch': 2} {'type': 'loss', 'content': 0.08512268960475922, 'timestamp': '2025-09-10 02:42:40.260859', 'step': 14277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.290567', 'step': 14277, 'epoch': 2} {'type': 'loss', 'content': 0.12078297883272171, 'timestamp': '2025-09-10 02:42:40.294259', 'step': 14278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:40.323899', 'step': 14278, 'epoch': 2} {'type': 'loss', 'content': 0.12554197013378143, 'timestamp': '2025-09-10 02:42:40.326239', 'step': 14279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.355878', 'step': 14279, 'epoch': 2} {'type': 'loss', 'content': 0.05615780130028725, 'timestamp': '2025-09-10 02:42:40.379431', 'step': 14280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.410367', 'step': 14280, 'epoch': 2} {'type': 'loss', 'content': 0.10805162042379379, 'timestamp': '2025-09-10 02:42:40.412596', 'step': 14281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:40.443028', 'step': 14281, 'epoch': 2} {'type': 'loss', 'content': 0.07995444536209106, 'timestamp': '2025-09-10 02:42:40.445245', 'step': 14282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:40.475540', 'step': 14282, 'epoch': 2} {'type': 'loss', 'content': 0.08694788813591003, 'timestamp': '2025-09-10 02:42:40.478329', 'step': 14283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:40.509550', 'step': 14283, 'epoch': 2} {'type': 'loss', 'content': 0.10016992688179016, 'timestamp': '2025-09-10 02:42:40.533179', 'step': 14284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:40.562972', 'step': 14284, 'epoch': 2} {'type': 'loss', 'content': 0.070224329829216, 'timestamp': '2025-09-10 02:42:40.565620', 'step': 14285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:40.595943', 'step': 14285, 'epoch': 2} {'type': 'loss', 'content': 0.15163253247737885, 'timestamp': '2025-09-10 02:42:40.598958', 'step': 14286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.628762', 'step': 14286, 'epoch': 2} {'type': 'loss', 'content': 0.1342853605747223, 'timestamp': '2025-09-10 02:42:40.634335', 'step': 14287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:40.668977', 'step': 14287, 'epoch': 2} {'type': 'loss', 'content': 0.059091560542583466, 'timestamp': '2025-09-10 02:42:40.692938', 'step': 14288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.727724', 'step': 14288, 'epoch': 2} {'type': 'loss', 'content': 0.08699259161949158, 'timestamp': '2025-09-10 02:42:40.730867', 'step': 14289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:40.762644', 'step': 14289, 'epoch': 2} {'type': 'loss', 'content': 0.08576574921607971, 'timestamp': '2025-09-10 02:42:40.765421', 'step': 14290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:40.797186', 'step': 14290, 'epoch': 2} {'type': 'loss', 'content': 0.10553686320781708, 'timestamp': '2025-09-10 02:42:40.801444', 'step': 14291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:40.832975', 'step': 14291, 'epoch': 2} {'type': 'loss', 'content': 0.05716688558459282, 'timestamp': '2025-09-10 02:42:40.856785', 'step': 14292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:40.886361', 'step': 14292, 'epoch': 2} {'type': 'loss', 'content': 0.1437285840511322, 'timestamp': '2025-09-10 02:42:40.888729', 'step': 14293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:40.918677', 'step': 14293, 'epoch': 2} {'type': 'loss', 'content': 0.10622362047433853, 'timestamp': '2025-09-10 02:42:40.920957', 'step': 14294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:40.951548', 'step': 14294, 'epoch': 2} {'type': 'loss', 'content': 0.06475669890642166, 'timestamp': '2025-09-10 02:42:40.953542', 'step': 14295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:40.983709', 'step': 14295, 'epoch': 2} {'type': 'loss', 'content': 0.06173699349164963, 'timestamp': '2025-09-10 02:42:41.007167', 'step': 14296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:41.037845', 'step': 14296, 'epoch': 2} {'type': 'loss', 'content': 0.1448737382888794, 'timestamp': '2025-09-10 02:42:41.040240', 'step': 14297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:41.070001', 'step': 14297, 'epoch': 2} {'type': 'loss', 'content': 0.09196734428405762, 'timestamp': '2025-09-10 02:42:41.072402', 'step': 14298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:41.103427', 'step': 14298, 'epoch': 2} {'type': 'loss', 'content': 0.10031769424676895, 'timestamp': '2025-09-10 02:42:41.106889', 'step': 14299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:41.136443', 'step': 14299, 'epoch': 2} {'type': 'loss', 'content': 0.07297278195619583, 'timestamp': '2025-09-10 02:42:41.160011', 'step': 14300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:41.190196', 'step': 14300, 'epoch': 2} {'type': 'loss', 'content': 0.10638182610273361, 'timestamp': '2025-09-10 02:42:41.192670', 'step': 14301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:41.223147', 'step': 14301, 'epoch': 2} {'type': 'loss', 'content': 0.06367461383342743, 'timestamp': '2025-09-10 02:42:41.225282', 'step': 14302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:41.255228', 'step': 14302, 'epoch': 2} {'type': 'loss', 'content': 0.1434945911169052, 'timestamp': '2025-09-10 02:42:41.257510', 'step': 14303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:41.287446', 'step': 14303, 'epoch': 2} {'type': 'loss', 'content': 0.08269777148962021, 'timestamp': '2025-09-10 02:42:41.311056', 'step': 14304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:41.342133', 'step': 14304, 'epoch': 2} {'type': 'loss', 'content': 0.1298309713602066, 'timestamp': '2025-09-10 02:42:41.344323', 'step': 14305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:41.374030', 'step': 14305, 'epoch': 2} {'type': 'loss', 'content': 0.07503064721822739, 'timestamp': '2025-09-10 02:42:41.376263', 'step': 14306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:41.405919', 'step': 14306, 'epoch': 2} {'type': 'loss', 'content': 0.04084057733416557, 'timestamp': '2025-09-10 02:42:41.409388', 'step': 14307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:41.439875', 'step': 14307, 'epoch': 2} {'type': 'loss', 'content': 0.09983458369970322, 'timestamp': '2025-09-10 02:42:41.463230', 'step': 14308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:41.495855', 'step': 14308, 'epoch': 2} {'type': 'loss', 'content': 0.09661681205034256, 'timestamp': '2025-09-10 02:42:41.498042', 'step': 14309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:41.528603', 'step': 14309, 'epoch': 2} {'type': 'loss', 'content': 0.08912186324596405, 'timestamp': '2025-09-10 02:42:41.530700', 'step': 14310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:41.561732', 'step': 14310, 'epoch': 2} {'type': 'loss', 'content': 0.045777030289173126, 'timestamp': '2025-09-10 02:42:41.564223', 'step': 14311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:41.599444', 'step': 14311, 'epoch': 2} {'type': 'loss', 'content': 0.0681959018111229, 'timestamp': '2025-09-10 02:42:41.622952', 'step': 14312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:41.654837', 'step': 14312, 'epoch': 2} {'type': 'loss', 'content': 0.092414990067482, 'timestamp': '2025-09-10 02:42:41.658149', 'step': 14313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:41.692489', 'step': 14313, 'epoch': 2} {'type': 'loss', 'content': 0.11624199897050858, 'timestamp': '2025-09-10 02:42:41.694909', 'step': 14314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:41.725335', 'step': 14314, 'epoch': 2} {'type': 'loss', 'content': 0.13084490597248077, 'timestamp': '2025-09-10 02:42:41.727722', 'step': 14315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:41.758103', 'step': 14315, 'epoch': 2} {'type': 'loss', 'content': 0.07763376086950302, 'timestamp': '2025-09-10 02:42:41.782164', 'step': 14316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:41.817188', 'step': 14316, 'epoch': 2} {'type': 'loss', 'content': 0.028200585395097733, 'timestamp': '2025-09-10 02:42:41.820869', 'step': 14317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:41.854586', 'step': 14317, 'epoch': 2} {'type': 'loss', 'content': 0.06532701849937439, 'timestamp': '2025-09-10 02:42:41.856844', 'step': 14318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:41.896061', 'step': 14318, 'epoch': 2} {'type': 'loss', 'content': 0.11313480883836746, 'timestamp': '2025-09-10 02:42:41.899607', 'step': 14319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:41.931112', 'step': 14319, 'epoch': 2} {'type': 'loss', 'content': 0.0887107253074646, 'timestamp': '2025-09-10 02:42:41.955447', 'step': 14320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:41.986731', 'step': 14320, 'epoch': 2} {'type': 'loss', 'content': 0.10192107409238815, 'timestamp': '2025-09-10 02:42:41.989245', 'step': 14321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:42.021994', 'step': 14321, 'epoch': 2} {'type': 'loss', 'content': 0.10393770784139633, 'timestamp': '2025-09-10 02:42:42.024904', 'step': 14322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:42.055595', 'step': 14322, 'epoch': 2} {'type': 'loss', 'content': 0.20470671355724335, 'timestamp': '2025-09-10 02:42:42.058246', 'step': 14323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:42.088549', 'step': 14323, 'epoch': 2} {'type': 'loss', 'content': 0.1621711552143097, 'timestamp': '2025-09-10 02:42:42.112668', 'step': 14324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:42.145028', 'step': 14324, 'epoch': 2} {'type': 'loss', 'content': 0.06267137080430984, 'timestamp': '2025-09-10 02:42:42.149544', 'step': 14325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:42.181017', 'step': 14325, 'epoch': 2} {'type': 'loss', 'content': 0.06418593972921371, 'timestamp': '2025-09-10 02:42:42.183439', 'step': 14326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:42.215867', 'step': 14326, 'epoch': 2} {'type': 'loss', 'content': 0.1268025040626526, 'timestamp': '2025-09-10 02:42:42.218212', 'step': 14327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:42.249293', 'step': 14327, 'epoch': 2} {'type': 'loss', 'content': 0.08943858742713928, 'timestamp': '2025-09-10 02:42:42.274689', 'step': 14328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:42.310725', 'step': 14328, 'epoch': 2} {'type': 'loss', 'content': 0.13690060377120972, 'timestamp': '2025-09-10 02:42:42.314499', 'step': 14329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:42.358042', 'step': 14329, 'epoch': 2} {'type': 'loss', 'content': 0.0517146922647953, 'timestamp': '2025-09-10 02:42:42.362649', 'step': 14330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:42.394787', 'step': 14330, 'epoch': 2} {'type': 'loss', 'content': 0.1405891329050064, 'timestamp': '2025-09-10 02:42:42.397137', 'step': 14331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:42.426992', 'step': 14331, 'epoch': 2} {'type': 'loss', 'content': 0.13474468886852264, 'timestamp': '2025-09-10 02:42:42.450732', 'step': 14332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:42.481300', 'step': 14332, 'epoch': 2} {'type': 'loss', 'content': 0.1081743910908699, 'timestamp': '2025-09-10 02:42:42.483569', 'step': 14333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:42.515112', 'step': 14333, 'epoch': 2} {'type': 'loss', 'content': 0.12055224180221558, 'timestamp': '2025-09-10 02:42:42.517734', 'step': 14334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:42.550035', 'step': 14334, 'epoch': 2} {'type': 'loss', 'content': 0.16650381684303284, 'timestamp': '2025-09-10 02:42:42.552310', 'step': 14335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:42.583088', 'step': 14335, 'epoch': 2} {'type': 'loss', 'content': 0.08795510977506638, 'timestamp': '2025-09-10 02:42:42.606691', 'step': 14336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:42.637904', 'step': 14336, 'epoch': 2} {'type': 'loss', 'content': 0.11602026969194412, 'timestamp': '2025-09-10 02:42:42.645108', 'step': 14337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:42.688427', 'step': 14337, 'epoch': 2} {'type': 'loss', 'content': 0.09045726805925369, 'timestamp': '2025-09-10 02:42:42.690569', 'step': 14338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:42.721724', 'step': 14338, 'epoch': 2} {'type': 'loss', 'content': 0.09811794012784958, 'timestamp': '2025-09-10 02:42:42.725531', 'step': 14339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:42.757009', 'step': 14339, 'epoch': 2} {'type': 'loss', 'content': 0.050804588943719864, 'timestamp': '2025-09-10 02:42:42.780990', 'step': 14340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:42.814530', 'step': 14340, 'epoch': 2} {'type': 'loss', 'content': 0.08386994898319244, 'timestamp': '2025-09-10 02:42:42.816767', 'step': 14341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:42.847217', 'step': 14341, 'epoch': 2} {'type': 'loss', 'content': 0.17304497957229614, 'timestamp': '2025-09-10 02:42:42.850079', 'step': 14342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:42.881035', 'step': 14342, 'epoch': 2} {'type': 'loss', 'content': 0.06168312951922417, 'timestamp': '2025-09-10 02:42:42.883885', 'step': 14343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:42.915564', 'step': 14343, 'epoch': 2} {'type': 'loss', 'content': 0.1441459208726883, 'timestamp': '2025-09-10 02:42:42.939147', 'step': 14344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:42.991149', 'step': 14344, 'epoch': 2} {'type': 'loss', 'content': 0.0945729911327362, 'timestamp': '2025-09-10 02:42:42.995121', 'step': 14345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:43.041353', 'step': 14345, 'epoch': 2} {'type': 'loss', 'content': 0.11128897219896317, 'timestamp': '2025-09-10 02:42:43.054443', 'step': 14346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:43.090235', 'step': 14346, 'epoch': 2} {'type': 'loss', 'content': 0.06305614858865738, 'timestamp': '2025-09-10 02:42:43.112915', 'step': 14347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:42:43.152933', 'step': 14347, 'epoch': 2} {'type': 'loss', 'content': 0.055063072592020035, 'timestamp': '2025-09-10 02:42:43.179051', 'step': 14348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:43.213866', 'step': 14348, 'epoch': 2} {'type': 'loss', 'content': 0.07468418031930923, 'timestamp': '2025-09-10 02:42:43.217556', 'step': 14349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:43.253806', 'step': 14349, 'epoch': 2} {'type': 'loss', 'content': 0.08266487717628479, 'timestamp': '2025-09-10 02:42:43.255925', 'step': 14350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:43.287812', 'step': 14350, 'epoch': 2} {'type': 'loss', 'content': 0.10297746956348419, 'timestamp': '2025-09-10 02:42:43.298221', 'step': 14351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:43.336537', 'step': 14351, 'epoch': 2} {'type': 'loss', 'content': 0.10178633779287338, 'timestamp': '2025-09-10 02:42:43.360562', 'step': 14352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:43.403260', 'step': 14352, 'epoch': 2} {'type': 'loss', 'content': 0.15272018313407898, 'timestamp': '2025-09-10 02:42:43.407219', 'step': 14353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:43.456250', 'step': 14353, 'epoch': 2} {'type': 'loss', 'content': 0.07371091842651367, 'timestamp': '2025-09-10 02:42:43.460794', 'step': 14354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:43.503940', 'step': 14354, 'epoch': 2} {'type': 'loss', 'content': 0.10689598321914673, 'timestamp': '2025-09-10 02:42:43.509124', 'step': 14355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:43.565604', 'step': 14355, 'epoch': 2} {'type': 'loss', 'content': 0.07836596667766571, 'timestamp': '2025-09-10 02:42:43.598631', 'step': 14356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:43.659467', 'step': 14356, 'epoch': 2} {'type': 'loss', 'content': 0.10659909248352051, 'timestamp': '2025-09-10 02:42:43.667219', 'step': 14357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:43.704253', 'step': 14357, 'epoch': 2} {'type': 'loss', 'content': 0.08998209983110428, 'timestamp': '2025-09-10 02:42:43.716109', 'step': 14358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:43.759936', 'step': 14358, 'epoch': 2} {'type': 'loss', 'content': 0.11992453783750534, 'timestamp': '2025-09-10 02:42:43.776215', 'step': 14359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:44.081286', 'step': 14359, 'epoch': 2} {'type': 'loss', 'content': 0.0959196612238884, 'timestamp': '2025-09-10 02:42:44.109839', 'step': 14360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:44.167663', 'step': 14360, 'epoch': 2} {'type': 'loss', 'content': 0.07983072102069855, 'timestamp': '2025-09-10 02:42:44.174771', 'step': 14361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:44.222353', 'step': 14361, 'epoch': 2} {'type': 'loss', 'content': 0.18703781068325043, 'timestamp': '2025-09-10 02:42:44.227935', 'step': 14362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:44.285956', 'step': 14362, 'epoch': 2} {'type': 'loss', 'content': 0.1439754217863083, 'timestamp': '2025-09-10 02:42:44.293467', 'step': 14363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:44.338584', 'step': 14363, 'epoch': 2} {'type': 'loss', 'content': 0.06669073551893234, 'timestamp': '2025-09-10 02:42:44.364244', 'step': 14364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:44.426827', 'step': 14364, 'epoch': 2} {'type': 'loss', 'content': 0.13801752030849457, 'timestamp': '2025-09-10 02:42:44.430377', 'step': 14365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:44.475196', 'step': 14365, 'epoch': 2} {'type': 'loss', 'content': 0.1559179574251175, 'timestamp': '2025-09-10 02:42:44.487589', 'step': 14366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:42:44.526507', 'step': 14366, 'epoch': 2} {'type': 'loss', 'content': 0.10181321203708649, 'timestamp': '2025-09-10 02:42:44.535225', 'step': 14367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:44.581284', 'step': 14367, 'epoch': 2} {'type': 'loss', 'content': 0.07446913421154022, 'timestamp': '2025-09-10 02:42:44.612452', 'step': 14368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:44.665921', 'step': 14368, 'epoch': 2} {'type': 'loss', 'content': 0.10100927948951721, 'timestamp': '2025-09-10 02:42:44.678614', 'step': 14369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:44.735524', 'step': 14369, 'epoch': 2} {'type': 'loss', 'content': 0.14775018393993378, 'timestamp': '2025-09-10 02:42:44.738001', 'step': 14370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:44.773537', 'step': 14370, 'epoch': 2} {'type': 'loss', 'content': 0.07975030690431595, 'timestamp': '2025-09-10 02:42:44.779623', 'step': 14371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:44.818978', 'step': 14371, 'epoch': 2} {'type': 'loss', 'content': 0.11552589386701584, 'timestamp': '2025-09-10 02:42:44.846025', 'step': 14372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:44.903164', 'step': 14372, 'epoch': 2} {'type': 'loss', 'content': 0.05943872407078743, 'timestamp': '2025-09-10 02:42:44.911782', 'step': 14373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:44.961616', 'step': 14373, 'epoch': 2} {'type': 'loss', 'content': 0.08861304819583893, 'timestamp': '2025-09-10 02:42:44.964726', 'step': 14374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:44.998280', 'step': 14374, 'epoch': 2} {'type': 'loss', 'content': 0.13709741830825806, 'timestamp': '2025-09-10 02:42:45.005179', 'step': 14375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:45.050471', 'step': 14375, 'epoch': 2} {'type': 'loss', 'content': 0.07852933555841446, 'timestamp': '2025-09-10 02:42:45.080640', 'step': 14376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:45.143911', 'step': 14376, 'epoch': 2} {'type': 'loss', 'content': 0.05933790281414986, 'timestamp': '2025-09-10 02:42:45.150692', 'step': 14377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:45.224391', 'step': 14377, 'epoch': 2} {'type': 'loss', 'content': 0.035695042461156845, 'timestamp': '2025-09-10 02:42:45.236841', 'step': 14378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:45.269149', 'step': 14378, 'epoch': 2} {'type': 'loss', 'content': 0.09743934124708176, 'timestamp': '2025-09-10 02:42:45.271720', 'step': 14379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:45.303947', 'step': 14379, 'epoch': 2} {'type': 'loss', 'content': 0.055706147104501724, 'timestamp': '2025-09-10 02:42:45.329107', 'step': 14380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:45.363602', 'step': 14380, 'epoch': 2} {'type': 'loss', 'content': 0.07040846347808838, 'timestamp': '2025-09-10 02:42:45.366742', 'step': 14381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:45.412209', 'step': 14381, 'epoch': 2} {'type': 'loss', 'content': 0.08531695604324341, 'timestamp': '2025-09-10 02:42:45.415668', 'step': 14382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:45.456040', 'step': 14382, 'epoch': 2} {'type': 'loss', 'content': 0.1062200590968132, 'timestamp': '2025-09-10 02:42:45.462797', 'step': 14383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:45.512627', 'step': 14383, 'epoch': 2} {'type': 'loss', 'content': 0.0860595703125, 'timestamp': '2025-09-10 02:42:45.542491', 'step': 14384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:45.578533', 'step': 14384, 'epoch': 2} {'type': 'loss', 'content': 0.09486082941293716, 'timestamp': '2025-09-10 02:42:45.583933', 'step': 14385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:42:45.626792', 'step': 14385, 'epoch': 2} {'type': 'loss', 'content': 0.09511486440896988, 'timestamp': '2025-09-10 02:42:45.630952', 'step': 14386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:45.661633', 'step': 14386, 'epoch': 2} {'type': 'loss', 'content': 0.09154834598302841, 'timestamp': '2025-09-10 02:42:45.663925', 'step': 14387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:45.696288', 'step': 14387, 'epoch': 2} {'type': 'loss', 'content': 0.12251880019903183, 'timestamp': '2025-09-10 02:42:45.721877', 'step': 14388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:45.754905', 'step': 14388, 'epoch': 2} {'type': 'loss', 'content': 0.1326126754283905, 'timestamp': '2025-09-10 02:42:45.766109', 'step': 14389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:45.839689', 'step': 14389, 'epoch': 2} {'type': 'loss', 'content': 0.11547303199768066, 'timestamp': '2025-09-10 02:42:45.862154', 'step': 14390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:45.916665', 'step': 14390, 'epoch': 2} {'type': 'loss', 'content': 0.05071000009775162, 'timestamp': '2025-09-10 02:42:45.926311', 'step': 14391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:45.973934', 'step': 14391, 'epoch': 2} {'type': 'loss', 'content': 0.03544877842068672, 'timestamp': '2025-09-10 02:42:46.001981', 'step': 14392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:46.062980', 'step': 14392, 'epoch': 2} {'type': 'loss', 'content': 0.04784440994262695, 'timestamp': '2025-09-10 02:42:46.069544', 'step': 14393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:46.108745', 'step': 14393, 'epoch': 2} {'type': 'loss', 'content': 0.09782489389181137, 'timestamp': '2025-09-10 02:42:46.111041', 'step': 14394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:46.141914', 'step': 14394, 'epoch': 2} {'type': 'loss', 'content': 0.07843980193138123, 'timestamp': '2025-09-10 02:42:46.144161', 'step': 14395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:46.182510', 'step': 14395, 'epoch': 2} {'type': 'loss', 'content': 0.10354103893041611, 'timestamp': '2025-09-10 02:42:46.206189', 'step': 14396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:46.238212', 'step': 14396, 'epoch': 2} {'type': 'loss', 'content': 0.06606452912092209, 'timestamp': '2025-09-10 02:42:46.240131', 'step': 14397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:46.272946', 'step': 14397, 'epoch': 2} {'type': 'loss', 'content': 0.0615701898932457, 'timestamp': '2025-09-10 02:42:46.276481', 'step': 14398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:42:46.309121', 'step': 14398, 'epoch': 2} {'type': 'loss', 'content': 0.07566103339195251, 'timestamp': '2025-09-10 02:42:46.312377', 'step': 14399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:46.344513', 'step': 14399, 'epoch': 2} {'type': 'loss', 'content': 0.11130206286907196, 'timestamp': '2025-09-10 02:42:46.368050', 'step': 14400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:46.415122', 'step': 14400, 'epoch': 2} {'type': 'loss', 'content': 0.13628512620925903, 'timestamp': '2025-09-10 02:42:46.421529', 'step': 14401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:46.465241', 'step': 14401, 'epoch': 2} {'type': 'loss', 'content': 0.12389707565307617, 'timestamp': '2025-09-10 02:42:46.495873', 'step': 14402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:46.535230', 'step': 14402, 'epoch': 2} {'type': 'loss', 'content': 0.12199484556913376, 'timestamp': '2025-09-10 02:42:46.540308', 'step': 14403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:46.585532', 'step': 14403, 'epoch': 2} {'type': 'loss', 'content': 0.0818011537194252, 'timestamp': '2025-09-10 02:42:46.613042', 'step': 14404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:46.720115', 'step': 14404, 'epoch': 2} {'type': 'loss', 'content': 0.04693767800927162, 'timestamp': '2025-09-10 02:42:46.722295', 'step': 14405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:46.753405', 'step': 14405, 'epoch': 2} {'type': 'loss', 'content': 0.1003037765622139, 'timestamp': '2025-09-10 02:42:46.755845', 'step': 14406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:46.786520', 'step': 14406, 'epoch': 2} {'type': 'loss', 'content': 0.09233183413743973, 'timestamp': '2025-09-10 02:42:46.799480', 'step': 14407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:46.853858', 'step': 14407, 'epoch': 2} {'type': 'loss', 'content': 0.022421084344387054, 'timestamp': '2025-09-10 02:42:46.886567', 'step': 14408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:46.937635', 'step': 14408, 'epoch': 2} {'type': 'loss', 'content': 0.09596863389015198, 'timestamp': '2025-09-10 02:42:46.941920', 'step': 14409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:42:46.984342', 'step': 14409, 'epoch': 2} {'type': 'loss', 'content': 0.16269251704216003, 'timestamp': '2025-09-10 02:42:46.987454', 'step': 14410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:47.043893', 'step': 14410, 'epoch': 2} {'type': 'loss', 'content': 0.08660100400447845, 'timestamp': '2025-09-10 02:42:47.051375', 'step': 14411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:47.101036', 'step': 14411, 'epoch': 2} {'type': 'loss', 'content': 0.10230901092290878, 'timestamp': '2025-09-10 02:42:47.140022', 'step': 14412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:47.194463', 'step': 14412, 'epoch': 2} {'type': 'loss', 'content': 0.1346910446882248, 'timestamp': '2025-09-10 02:42:47.196782', 'step': 14413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:47.228528', 'step': 14413, 'epoch': 2} {'type': 'loss', 'content': 0.050872351974248886, 'timestamp': '2025-09-10 02:42:47.230505', 'step': 14414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:47.262825', 'step': 14414, 'epoch': 2} {'type': 'loss', 'content': 0.17390885949134827, 'timestamp': '2025-09-10 02:42:47.265332', 'step': 14415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:47.298489', 'step': 14415, 'epoch': 2} {'type': 'loss', 'content': 0.055323947221040726, 'timestamp': '2025-09-10 02:42:47.322299', 'step': 14416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:47.354694', 'step': 14416, 'epoch': 2} {'type': 'loss', 'content': 0.22061973810195923, 'timestamp': '2025-09-10 02:42:47.357220', 'step': 14417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:47.388419', 'step': 14417, 'epoch': 2} {'type': 'loss', 'content': 0.0709892138838768, 'timestamp': '2025-09-10 02:42:47.391986', 'step': 14418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:47.425418', 'step': 14418, 'epoch': 2} {'type': 'loss', 'content': 0.17976373434066772, 'timestamp': '2025-09-10 02:42:47.435021', 'step': 14419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:47.476948', 'step': 14419, 'epoch': 2} {'type': 'loss', 'content': 0.11260505020618439, 'timestamp': '2025-09-10 02:42:47.502363', 'step': 14420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:47.543246', 'step': 14420, 'epoch': 2} {'type': 'loss', 'content': 0.1479245126247406, 'timestamp': '2025-09-10 02:42:47.546096', 'step': 14421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:47.586483', 'step': 14421, 'epoch': 2} {'type': 'loss', 'content': 0.10853835195302963, 'timestamp': '2025-09-10 02:42:47.592533', 'step': 14422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:47.644437', 'step': 14422, 'epoch': 2} {'type': 'loss', 'content': 0.08759257942438126, 'timestamp': '2025-09-10 02:42:47.658921', 'step': 14423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:47.718969', 'step': 14423, 'epoch': 2} {'type': 'loss', 'content': 0.03360572084784508, 'timestamp': '2025-09-10 02:42:47.746862', 'step': 14424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:47.801854', 'step': 14424, 'epoch': 2} {'type': 'loss', 'content': 0.11379740387201309, 'timestamp': '2025-09-10 02:42:47.810526', 'step': 14425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:47.850844', 'step': 14425, 'epoch': 2} {'type': 'loss', 'content': 0.04397840425372124, 'timestamp': '2025-09-10 02:42:47.854564', 'step': 14426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:47.893139', 'step': 14426, 'epoch': 2} {'type': 'loss', 'content': 0.07583601772785187, 'timestamp': '2025-09-10 02:42:47.897189', 'step': 14427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:47.927493', 'step': 14427, 'epoch': 2} {'type': 'loss', 'content': 0.12557077407836914, 'timestamp': '2025-09-10 02:42:47.951164', 'step': 14428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:47.985720', 'step': 14428, 'epoch': 2} {'type': 'loss', 'content': 0.02051970362663269, 'timestamp': '2025-09-10 02:42:48.012991', 'step': 14429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:48.069936', 'step': 14429, 'epoch': 2} {'type': 'loss', 'content': 0.10283859819173813, 'timestamp': '2025-09-10 02:42:48.077733', 'step': 14430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:48.128884', 'step': 14430, 'epoch': 2} {'type': 'loss', 'content': 0.12391021102666855, 'timestamp': '2025-09-10 02:42:48.144772', 'step': 14431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:48.200404', 'step': 14431, 'epoch': 2} {'type': 'loss', 'content': 0.09661222249269485, 'timestamp': '2025-09-10 02:42:48.225277', 'step': 14432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:48.279694', 'step': 14432, 'epoch': 2} {'type': 'loss', 'content': 0.1979786604642868, 'timestamp': '2025-09-10 02:42:48.293810', 'step': 14433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:48.336675', 'step': 14433, 'epoch': 2} {'type': 'loss', 'content': 0.14108806848526, 'timestamp': '2025-09-10 02:42:48.347892', 'step': 14434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:42:48.391939', 'step': 14434, 'epoch': 2} {'type': 'loss', 'content': 0.13666005432605743, 'timestamp': '2025-09-10 02:42:48.399439', 'step': 14435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:48.447915', 'step': 14435, 'epoch': 2} {'type': 'loss', 'content': 0.09453478455543518, 'timestamp': '2025-09-10 02:42:48.489963', 'step': 14436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:48.534189', 'step': 14436, 'epoch': 2} {'type': 'loss', 'content': 0.10453528165817261, 'timestamp': '2025-09-10 02:42:48.536566', 'step': 14437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:48.567445', 'step': 14437, 'epoch': 2} {'type': 'loss', 'content': 0.2142724245786667, 'timestamp': '2025-09-10 02:42:48.569856', 'step': 14438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:48.611759', 'step': 14438, 'epoch': 2} {'type': 'loss', 'content': 0.07004157453775406, 'timestamp': '2025-09-10 02:42:48.618392', 'step': 14439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:48.658595', 'step': 14439, 'epoch': 2} {'type': 'loss', 'content': 0.11645784974098206, 'timestamp': '2025-09-10 02:42:48.686360', 'step': 14440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:48.730091', 'step': 14440, 'epoch': 2} {'type': 'loss', 'content': 0.10814813524484634, 'timestamp': '2025-09-10 02:42:48.743030', 'step': 14441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:48.783406', 'step': 14441, 'epoch': 2} {'type': 'loss', 'content': 0.04600049555301666, 'timestamp': '2025-09-10 02:42:48.787616', 'step': 14442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:42:48.832701', 'step': 14442, 'epoch': 2} {'type': 'loss', 'content': 0.18101079761981964, 'timestamp': '2025-09-10 02:42:48.838094', 'step': 14443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:48.880612', 'step': 14443, 'epoch': 2} {'type': 'loss', 'content': 0.055779244750738144, 'timestamp': '2025-09-10 02:42:48.909793', 'step': 14444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:42:48.949418', 'step': 14444, 'epoch': 2} {'type': 'loss', 'content': 0.03977727144956589, 'timestamp': '2025-09-10 02:42:49.203205', 'step': 14445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:42:49.234449', 'step': 14445, 'epoch': 2} {'type': 'loss', 'content': 0.18105784058570862, 'timestamp': '2025-09-10 02:42:49.236573', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:43:03.789439', 'step': 14446, 'epoch': 2} {'type': 'pplx', 'content': 8773.582710908446, 'timestamp': '2025-09-10 02:43:03.792711', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:03.822795', 'step': 14446, 'epoch': 2} {'type': 'loss', 'content': 0.15403532981872559, 'timestamp': '2025-09-10 02:43:03.824965', 'step': 14447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:03.856212', 'step': 14447, 'epoch': 2} {'type': 'loss', 'content': 0.09020351618528366, 'timestamp': '2025-09-10 02:43:03.879943', 'step': 14448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:03.910072', 'step': 14448, 'epoch': 2} {'type': 'loss', 'content': 0.04445920139551163, 'timestamp': '2025-09-10 02:43:03.914948', 'step': 14449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:03.944640', 'step': 14449, 'epoch': 2} {'type': 'loss', 'content': 0.11800051480531693, 'timestamp': '2025-09-10 02:43:03.947482', 'step': 14450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:03.977794', 'step': 14450, 'epoch': 2} {'type': 'loss', 'content': 0.12790735065937042, 'timestamp': '2025-09-10 02:43:03.980225', 'step': 14451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.010496', 'step': 14451, 'epoch': 2} {'type': 'loss', 'content': 0.09020520001649857, 'timestamp': '2025-09-10 02:43:04.036249', 'step': 14452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:04.067396', 'step': 14452, 'epoch': 2} {'type': 'loss', 'content': 0.13251836597919464, 'timestamp': '2025-09-10 02:43:04.069628', 'step': 14453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:04.101341', 'step': 14453, 'epoch': 2} {'type': 'loss', 'content': 0.06129473075270653, 'timestamp': '2025-09-10 02:43:04.103645', 'step': 14454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:04.134339', 'step': 14454, 'epoch': 2} {'type': 'loss', 'content': 0.18202053010463715, 'timestamp': '2025-09-10 02:43:04.136278', 'step': 14455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.166521', 'step': 14455, 'epoch': 2} {'type': 'loss', 'content': 0.05977685749530792, 'timestamp': '2025-09-10 02:43:04.191102', 'step': 14456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:04.221431', 'step': 14456, 'epoch': 2} {'type': 'loss', 'content': 0.09107192605733871, 'timestamp': '2025-09-10 02:43:04.223532', 'step': 14457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:04.254038', 'step': 14457, 'epoch': 2} {'type': 'loss', 'content': 0.16345196962356567, 'timestamp': '2025-09-10 02:43:04.255986', 'step': 14458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:04.285641', 'step': 14458, 'epoch': 2} {'type': 'loss', 'content': 0.10624772310256958, 'timestamp': '2025-09-10 02:43:04.288015', 'step': 14459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.319031', 'step': 14459, 'epoch': 2} {'type': 'loss', 'content': 0.189459428191185, 'timestamp': '2025-09-10 02:43:04.342501', 'step': 14460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.371629', 'step': 14460, 'epoch': 2} {'type': 'loss', 'content': 0.018288681283593178, 'timestamp': '2025-09-10 02:43:04.374065', 'step': 14461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:04.404766', 'step': 14461, 'epoch': 2} {'type': 'loss', 'content': 0.09061811119318008, 'timestamp': '2025-09-10 02:43:04.407113', 'step': 14462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:04.438908', 'step': 14462, 'epoch': 2} {'type': 'loss', 'content': 0.15044422447681427, 'timestamp': '2025-09-10 02:43:04.441394', 'step': 14463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:04.471614', 'step': 14463, 'epoch': 2} {'type': 'loss', 'content': 0.1130853220820427, 'timestamp': '2025-09-10 02:43:04.494835', 'step': 14464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.525637', 'step': 14464, 'epoch': 2} {'type': 'loss', 'content': 0.08118745684623718, 'timestamp': '2025-09-10 02:43:04.527783', 'step': 14465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.558595', 'step': 14465, 'epoch': 2} {'type': 'loss', 'content': 0.08605637401342392, 'timestamp': '2025-09-10 02:43:04.560508', 'step': 14466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.590444', 'step': 14466, 'epoch': 2} {'type': 'loss', 'content': 0.11217435449361801, 'timestamp': '2025-09-10 02:43:04.592780', 'step': 14467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:04.623300', 'step': 14467, 'epoch': 2} {'type': 'loss', 'content': 0.0658031553030014, 'timestamp': '2025-09-10 02:43:04.646959', 'step': 14468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:04.677126', 'step': 14468, 'epoch': 2} {'type': 'loss', 'content': 0.13446390628814697, 'timestamp': '2025-09-10 02:43:04.679815', 'step': 14469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:04.710024', 'step': 14469, 'epoch': 2} {'type': 'loss', 'content': 0.06623359024524689, 'timestamp': '2025-09-10 02:43:04.712185', 'step': 14470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.777716', 'step': 14470, 'epoch': 2} {'type': 'loss', 'content': 0.10994549840688705, 'timestamp': '2025-09-10 02:43:04.779906', 'step': 14471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:04.809885', 'step': 14471, 'epoch': 2} {'type': 'loss', 'content': 0.11106924712657928, 'timestamp': '2025-09-10 02:43:04.833589', 'step': 14472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:04.864937', 'step': 14472, 'epoch': 2} {'type': 'loss', 'content': 0.1465023159980774, 'timestamp': '2025-09-10 02:43:04.868103', 'step': 14473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.899581', 'step': 14473, 'epoch': 2} {'type': 'loss', 'content': 0.17480169236660004, 'timestamp': '2025-09-10 02:43:04.901930', 'step': 14474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:04.931961', 'step': 14474, 'epoch': 2} {'type': 'loss', 'content': 0.10048291087150574, 'timestamp': '2025-09-10 02:43:04.934544', 'step': 14475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:04.965583', 'step': 14475, 'epoch': 2} {'type': 'loss', 'content': 0.13912980258464813, 'timestamp': '2025-09-10 02:43:04.989345', 'step': 14476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.020033', 'step': 14476, 'epoch': 2} {'type': 'loss', 'content': 0.16715627908706665, 'timestamp': '2025-09-10 02:43:05.022304', 'step': 14477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.055555', 'step': 14477, 'epoch': 2} {'type': 'loss', 'content': 0.16127093136310577, 'timestamp': '2025-09-10 02:43:05.059188', 'step': 14478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.093016', 'step': 14478, 'epoch': 2} {'type': 'loss', 'content': 0.10934679210186005, 'timestamp': '2025-09-10 02:43:05.095276', 'step': 14479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:05.126143', 'step': 14479, 'epoch': 2} {'type': 'loss', 'content': 0.047811977565288544, 'timestamp': '2025-09-10 02:43:05.151198', 'step': 14480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:05.182160', 'step': 14480, 'epoch': 2} {'type': 'loss', 'content': 0.11720465123653412, 'timestamp': '2025-09-10 02:43:05.185102', 'step': 14481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:05.215162', 'step': 14481, 'epoch': 2} {'type': 'loss', 'content': 0.08994024246931076, 'timestamp': '2025-09-10 02:43:05.217441', 'step': 14482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:05.247333', 'step': 14482, 'epoch': 2} {'type': 'loss', 'content': 0.09702755510807037, 'timestamp': '2025-09-10 02:43:05.249714', 'step': 14483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.279901', 'step': 14483, 'epoch': 2} {'type': 'loss', 'content': 0.05826644226908684, 'timestamp': '2025-09-10 02:43:05.303439', 'step': 14484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.334884', 'step': 14484, 'epoch': 2} {'type': 'loss', 'content': 0.1340472400188446, 'timestamp': '2025-09-10 02:43:05.337876', 'step': 14485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:05.367898', 'step': 14485, 'epoch': 2} {'type': 'loss', 'content': 0.16829180717468262, 'timestamp': '2025-09-10 02:43:05.370125', 'step': 14486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.400335', 'step': 14486, 'epoch': 2} {'type': 'loss', 'content': 0.12472302466630936, 'timestamp': '2025-09-10 02:43:05.403504', 'step': 14487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.435405', 'step': 14487, 'epoch': 2} {'type': 'loss', 'content': 0.0745072066783905, 'timestamp': '2025-09-10 02:43:05.458804', 'step': 14488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:43:05.489693', 'step': 14488, 'epoch': 2} {'type': 'loss', 'content': 0.06678076833486557, 'timestamp': '2025-09-10 02:43:05.492239', 'step': 14489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:05.522518', 'step': 14489, 'epoch': 2} {'type': 'loss', 'content': 0.13264460861682892, 'timestamp': '2025-09-10 02:43:05.524854', 'step': 14490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:05.554860', 'step': 14490, 'epoch': 2} {'type': 'loss', 'content': 0.05194186791777611, 'timestamp': '2025-09-10 02:43:05.557194', 'step': 14491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:05.587169', 'step': 14491, 'epoch': 2} {'type': 'loss', 'content': 0.07661781460046768, 'timestamp': '2025-09-10 02:43:05.610560', 'step': 14492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:05.642034', 'step': 14492, 'epoch': 2} {'type': 'loss', 'content': 0.09154870361089706, 'timestamp': '2025-09-10 02:43:05.644010', 'step': 14493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.674289', 'step': 14493, 'epoch': 2} {'type': 'loss', 'content': 0.11595262587070465, 'timestamp': '2025-09-10 02:43:05.676811', 'step': 14494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:05.707682', 'step': 14494, 'epoch': 2} {'type': 'loss', 'content': 0.0668460875749588, 'timestamp': '2025-09-10 02:43:05.709914', 'step': 14495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:05.744565', 'step': 14495, 'epoch': 2} {'type': 'loss', 'content': 0.07499686628580093, 'timestamp': '2025-09-10 02:43:05.768233', 'step': 14496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:05.799640', 'step': 14496, 'epoch': 2} {'type': 'loss', 'content': 0.11364103853702545, 'timestamp': '2025-09-10 02:43:05.802124', 'step': 14497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.832488', 'step': 14497, 'epoch': 2} {'type': 'loss', 'content': 0.07838119566440582, 'timestamp': '2025-09-10 02:43:05.835394', 'step': 14498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:05.867719', 'step': 14498, 'epoch': 2} {'type': 'loss', 'content': 0.13256581127643585, 'timestamp': '2025-09-10 02:43:05.870913', 'step': 14499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:05.901121', 'step': 14499, 'epoch': 2} {'type': 'loss', 'content': 0.12750525772571564, 'timestamp': '2025-09-10 02:43:05.924650', 'step': 14500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14500', 'timestamp': '2025-09-10 02:43:12.323344', 'step': 14500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.375764', 'step': 14500, 'epoch': 2} {'type': 'loss', 'content': 0.05554128438234329, 'timestamp': '2025-09-10 02:43:12.378110', 'step': 14501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.409120', 'step': 14501, 'epoch': 2} {'type': 'loss', 'content': 0.14693397283554077, 'timestamp': '2025-09-10 02:43:12.411455', 'step': 14502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.441189', 'step': 14502, 'epoch': 2} {'type': 'loss', 'content': 0.08096324652433395, 'timestamp': '2025-09-10 02:43:12.443145', 'step': 14503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:12.472705', 'step': 14503, 'epoch': 2} {'type': 'loss', 'content': 0.09728311747312546, 'timestamp': '2025-09-10 02:43:12.496481', 'step': 14504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:12.528029', 'step': 14504, 'epoch': 2} {'type': 'loss', 'content': 0.10128262639045715, 'timestamp': '2025-09-10 02:43:12.530406', 'step': 14505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.563024', 'step': 14505, 'epoch': 2} {'type': 'loss', 'content': 0.07137354463338852, 'timestamp': '2025-09-10 02:43:12.565431', 'step': 14506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.595582', 'step': 14506, 'epoch': 2} {'type': 'loss', 'content': 0.12268517166376114, 'timestamp': '2025-09-10 02:43:12.598094', 'step': 14507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.628114', 'step': 14507, 'epoch': 2} {'type': 'loss', 'content': 0.11798865348100662, 'timestamp': '2025-09-10 02:43:12.651825', 'step': 14508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.682462', 'step': 14508, 'epoch': 2} {'type': 'loss', 'content': 0.07562775909900665, 'timestamp': '2025-09-10 02:43:12.685005', 'step': 14509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.714769', 'step': 14509, 'epoch': 2} {'type': 'loss', 'content': 0.08979729562997818, 'timestamp': '2025-09-10 02:43:12.717520', 'step': 14510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.749466', 'step': 14510, 'epoch': 2} {'type': 'loss', 'content': 0.1330501288175583, 'timestamp': '2025-09-10 02:43:12.752889', 'step': 14511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:12.782938', 'step': 14511, 'epoch': 2} {'type': 'loss', 'content': 0.17008477449417114, 'timestamp': '2025-09-10 02:43:12.806504', 'step': 14512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.837267', 'step': 14512, 'epoch': 2} {'type': 'loss', 'content': 0.12262654304504395, 'timestamp': '2025-09-10 02:43:12.839380', 'step': 14513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:12.870115', 'step': 14513, 'epoch': 2} {'type': 'loss', 'content': 0.09906243532896042, 'timestamp': '2025-09-10 02:43:12.872404', 'step': 14514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:12.902907', 'step': 14514, 'epoch': 2} {'type': 'loss', 'content': 0.1283033788204193, 'timestamp': '2025-09-10 02:43:12.905388', 'step': 14515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:12.934703', 'step': 14515, 'epoch': 2} {'type': 'loss', 'content': 0.07456006854772568, 'timestamp': '2025-09-10 02:43:12.958324', 'step': 14516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:12.999915', 'step': 14516, 'epoch': 2} {'type': 'loss', 'content': 0.16523806750774384, 'timestamp': '2025-09-10 02:43:13.002201', 'step': 14517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:13.033225', 'step': 14517, 'epoch': 2} {'type': 'loss', 'content': 0.1484667956829071, 'timestamp': '2025-09-10 02:43:13.035518', 'step': 14518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:13.065701', 'step': 14518, 'epoch': 2} {'type': 'loss', 'content': 0.1948714405298233, 'timestamp': '2025-09-10 02:43:13.068504', 'step': 14519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:13.100374', 'step': 14519, 'epoch': 2} {'type': 'loss', 'content': 0.25778907537460327, 'timestamp': '2025-09-10 02:43:13.124002', 'step': 14520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:13.157927', 'step': 14520, 'epoch': 2} {'type': 'loss', 'content': 0.11273597180843353, 'timestamp': '2025-09-10 02:43:13.160402', 'step': 14521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:13.190694', 'step': 14521, 'epoch': 2} {'type': 'loss', 'content': 0.18446582555770874, 'timestamp': '2025-09-10 02:43:13.192917', 'step': 14522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.222638', 'step': 14522, 'epoch': 2} {'type': 'loss', 'content': 0.160700261592865, 'timestamp': '2025-09-10 02:43:13.224748', 'step': 14523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:13.257228', 'step': 14523, 'epoch': 2} {'type': 'loss', 'content': 0.13293598592281342, 'timestamp': '2025-09-10 02:43:13.281073', 'step': 14524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.311369', 'step': 14524, 'epoch': 2} {'type': 'loss', 'content': 0.09349088370800018, 'timestamp': '2025-09-10 02:43:13.313620', 'step': 14525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.343768', 'step': 14525, 'epoch': 2} {'type': 'loss', 'content': 0.1605556607246399, 'timestamp': '2025-09-10 02:43:13.346467', 'step': 14526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:43:13.376917', 'step': 14526, 'epoch': 2} {'type': 'loss', 'content': 0.14656184613704681, 'timestamp': '2025-09-10 02:43:13.381141', 'step': 14527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.411529', 'step': 14527, 'epoch': 2} {'type': 'loss', 'content': 0.03362279385328293, 'timestamp': '2025-09-10 02:43:13.435105', 'step': 14528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.465429', 'step': 14528, 'epoch': 2} {'type': 'loss', 'content': 0.07456620782613754, 'timestamp': '2025-09-10 02:43:13.467404', 'step': 14529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:13.497325', 'step': 14529, 'epoch': 2} {'type': 'loss', 'content': 0.09504581242799759, 'timestamp': '2025-09-10 02:43:13.499668', 'step': 14530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:13.530313', 'step': 14530, 'epoch': 2} {'type': 'loss', 'content': 0.10755044221878052, 'timestamp': '2025-09-10 02:43:13.535271', 'step': 14531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:13.566324', 'step': 14531, 'epoch': 2} {'type': 'loss', 'content': 0.1228862777352333, 'timestamp': '2025-09-10 02:43:13.589822', 'step': 14532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:13.620846', 'step': 14532, 'epoch': 2} {'type': 'loss', 'content': 0.07739472389221191, 'timestamp': '2025-09-10 02:43:13.623409', 'step': 14533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.653133', 'step': 14533, 'epoch': 2} {'type': 'loss', 'content': 0.09644965827465057, 'timestamp': '2025-09-10 02:43:13.655413', 'step': 14534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:13.686716', 'step': 14534, 'epoch': 2} {'type': 'loss', 'content': 0.06117705628275871, 'timestamp': '2025-09-10 02:43:13.688912', 'step': 14535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.718914', 'step': 14535, 'epoch': 2} {'type': 'loss', 'content': 0.09286879748106003, 'timestamp': '2025-09-10 02:43:13.742855', 'step': 14536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:13.779750', 'step': 14536, 'epoch': 2} {'type': 'loss', 'content': 0.06070280820131302, 'timestamp': '2025-09-10 02:43:13.782340', 'step': 14537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:13.814431', 'step': 14537, 'epoch': 2} {'type': 'loss', 'content': 0.1272563487291336, 'timestamp': '2025-09-10 02:43:13.816514', 'step': 14538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.846508', 'step': 14538, 'epoch': 2} {'type': 'loss', 'content': 0.0847548395395279, 'timestamp': '2025-09-10 02:43:13.848839', 'step': 14539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:13.879120', 'step': 14539, 'epoch': 2} {'type': 'loss', 'content': 0.08487871289253235, 'timestamp': '2025-09-10 02:43:13.903043', 'step': 14540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.933531', 'step': 14540, 'epoch': 2} {'type': 'loss', 'content': 0.1509573608636856, 'timestamp': '2025-09-10 02:43:13.936602', 'step': 14541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:13.976864', 'step': 14541, 'epoch': 2} {'type': 'loss', 'content': 0.08760366588830948, 'timestamp': '2025-09-10 02:43:13.979125', 'step': 14542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:14.012638', 'step': 14542, 'epoch': 2} {'type': 'loss', 'content': 0.10511404275894165, 'timestamp': '2025-09-10 02:43:14.014987', 'step': 14543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.044802', 'step': 14543, 'epoch': 2} {'type': 'loss', 'content': 0.12248364835977554, 'timestamp': '2025-09-10 02:43:14.067953', 'step': 14544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:14.099286', 'step': 14544, 'epoch': 2} {'type': 'loss', 'content': 0.15847954154014587, 'timestamp': '2025-09-10 02:43:14.101444', 'step': 14545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.136380', 'step': 14545, 'epoch': 2} {'type': 'loss', 'content': 0.1235528215765953, 'timestamp': '2025-09-10 02:43:14.141089', 'step': 14546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.175679', 'step': 14546, 'epoch': 2} {'type': 'loss', 'content': 0.11659909784793854, 'timestamp': '2025-09-10 02:43:14.178090', 'step': 14547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.208126', 'step': 14547, 'epoch': 2} {'type': 'loss', 'content': 0.10362903773784637, 'timestamp': '2025-09-10 02:43:14.231557', 'step': 14548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.262867', 'step': 14548, 'epoch': 2} {'type': 'loss', 'content': 0.09182590246200562, 'timestamp': '2025-09-10 02:43:14.265175', 'step': 14549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:14.295465', 'step': 14549, 'epoch': 2} {'type': 'loss', 'content': 0.04192204028367996, 'timestamp': '2025-09-10 02:43:14.297757', 'step': 14550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.328035', 'step': 14550, 'epoch': 2} {'type': 'loss', 'content': 0.0738372877240181, 'timestamp': '2025-09-10 02:43:14.330329', 'step': 14551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:14.359896', 'step': 14551, 'epoch': 2} {'type': 'loss', 'content': 0.05433619022369385, 'timestamp': '2025-09-10 02:43:14.386040', 'step': 14552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:14.420575', 'step': 14552, 'epoch': 2} {'type': 'loss', 'content': 0.07837124913930893, 'timestamp': '2025-09-10 02:43:14.423091', 'step': 14553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:14.453189', 'step': 14553, 'epoch': 2} {'type': 'loss', 'content': 0.0357072614133358, 'timestamp': '2025-09-10 02:43:14.455598', 'step': 14554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.486322', 'step': 14554, 'epoch': 2} {'type': 'loss', 'content': 0.17188239097595215, 'timestamp': '2025-09-10 02:43:14.488759', 'step': 14555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:14.519180', 'step': 14555, 'epoch': 2} {'type': 'loss', 'content': 0.11791734397411346, 'timestamp': '2025-09-10 02:43:14.542629', 'step': 14556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.579639', 'step': 14556, 'epoch': 2} {'type': 'loss', 'content': 0.09670230746269226, 'timestamp': '2025-09-10 02:43:14.582240', 'step': 14557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:14.612156', 'step': 14557, 'epoch': 2} {'type': 'loss', 'content': 0.12035298347473145, 'timestamp': '2025-09-10 02:43:14.616028', 'step': 14558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:14.647540', 'step': 14558, 'epoch': 2} {'type': 'loss', 'content': 0.06771639734506607, 'timestamp': '2025-09-10 02:43:14.650956', 'step': 14559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:14.684623', 'step': 14559, 'epoch': 2} {'type': 'loss', 'content': 0.07222945988178253, 'timestamp': '2025-09-10 02:43:14.709459', 'step': 14560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.740632', 'step': 14560, 'epoch': 2} {'type': 'loss', 'content': 0.1463354378938675, 'timestamp': '2025-09-10 02:43:14.743314', 'step': 14561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:14.776247', 'step': 14561, 'epoch': 2} {'type': 'loss', 'content': 0.1674414873123169, 'timestamp': '2025-09-10 02:43:14.778975', 'step': 14562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.808890', 'step': 14562, 'epoch': 2} {'type': 'loss', 'content': 0.07859130203723907, 'timestamp': '2025-09-10 02:43:14.811392', 'step': 14563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:14.841580', 'step': 14563, 'epoch': 2} {'type': 'loss', 'content': 0.19210809469223022, 'timestamp': '2025-09-10 02:43:14.865325', 'step': 14564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:14.896152', 'step': 14564, 'epoch': 2} {'type': 'loss', 'content': 0.08442579954862595, 'timestamp': '2025-09-10 02:43:14.898528', 'step': 14565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:14.930220', 'step': 14565, 'epoch': 2} {'type': 'loss', 'content': 0.14484712481498718, 'timestamp': '2025-09-10 02:43:14.932653', 'step': 14566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:14.970642', 'step': 14566, 'epoch': 2} {'type': 'loss', 'content': 0.050106342881917953, 'timestamp': '2025-09-10 02:43:14.975973', 'step': 14567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:43:15.012875', 'step': 14567, 'epoch': 2} {'type': 'loss', 'content': 0.08989645540714264, 'timestamp': '2025-09-10 02:43:15.037868', 'step': 14568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:15.068118', 'step': 14568, 'epoch': 2} {'type': 'loss', 'content': 0.034047357738018036, 'timestamp': '2025-09-10 02:43:15.070521', 'step': 14569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:15.101201', 'step': 14569, 'epoch': 2} {'type': 'loss', 'content': 0.0981208086013794, 'timestamp': '2025-09-10 02:43:15.103654', 'step': 14570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:15.134215', 'step': 14570, 'epoch': 2} {'type': 'loss', 'content': 0.09742794930934906, 'timestamp': '2025-09-10 02:43:15.137256', 'step': 14571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:15.168058', 'step': 14571, 'epoch': 2} {'type': 'loss', 'content': 0.18674162030220032, 'timestamp': '2025-09-10 02:43:15.193502', 'step': 14572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:15.224284', 'step': 14572, 'epoch': 2} {'type': 'loss', 'content': 0.0953691378235817, 'timestamp': '2025-09-10 02:43:15.228907', 'step': 14573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:15.265256', 'step': 14573, 'epoch': 2} {'type': 'loss', 'content': 0.049267761409282684, 'timestamp': '2025-09-10 02:43:15.269645', 'step': 14574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:15.306261', 'step': 14574, 'epoch': 2} {'type': 'loss', 'content': 0.12163259088993073, 'timestamp': '2025-09-10 02:43:15.309350', 'step': 14575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:15.342954', 'step': 14575, 'epoch': 2} {'type': 'loss', 'content': 0.058467935770750046, 'timestamp': '2025-09-10 02:43:15.366907', 'step': 14576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:15.397746', 'step': 14576, 'epoch': 2} {'type': 'loss', 'content': 0.15735416114330292, 'timestamp': '2025-09-10 02:43:15.400453', 'step': 14577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:15.431852', 'step': 14577, 'epoch': 2} {'type': 'loss', 'content': 0.22821585834026337, 'timestamp': '2025-09-10 02:43:15.433980', 'step': 14578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:15.465184', 'step': 14578, 'epoch': 2} {'type': 'loss', 'content': 0.044791094958782196, 'timestamp': '2025-09-10 02:43:15.467759', 'step': 14579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:15.498707', 'step': 14579, 'epoch': 2} {'type': 'loss', 'content': 0.09493757039308548, 'timestamp': '2025-09-10 02:43:15.522466', 'step': 14580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:15.552769', 'step': 14580, 'epoch': 2} {'type': 'loss', 'content': 0.1434180587530136, 'timestamp': '2025-09-10 02:43:15.555091', 'step': 14581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:15.587001', 'step': 14581, 'epoch': 2} {'type': 'loss', 'content': 0.0629965141415596, 'timestamp': '2025-09-10 02:43:15.589896', 'step': 14582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:15.622642', 'step': 14582, 'epoch': 2} {'type': 'loss', 'content': 0.0769822895526886, 'timestamp': '2025-09-10 02:43:15.625259', 'step': 14583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:15.654976', 'step': 14583, 'epoch': 2} {'type': 'loss', 'content': 0.07728433609008789, 'timestamp': '2025-09-10 02:43:15.678742', 'step': 14584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:15.710125', 'step': 14584, 'epoch': 2} {'type': 'loss', 'content': 0.057739123702049255, 'timestamp': '2025-09-10 02:43:15.712590', 'step': 14585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:15.743246', 'step': 14585, 'epoch': 2} {'type': 'loss', 'content': 0.07466785609722137, 'timestamp': '2025-09-10 02:43:15.745559', 'step': 14586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:15.776849', 'step': 14586, 'epoch': 2} {'type': 'loss', 'content': 0.05599140003323555, 'timestamp': '2025-09-10 02:43:15.779462', 'step': 14587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:15.809278', 'step': 14587, 'epoch': 2} {'type': 'loss', 'content': 0.15918485820293427, 'timestamp': '2025-09-10 02:43:15.832766', 'step': 14588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:15.863208', 'step': 14588, 'epoch': 2} {'type': 'loss', 'content': 0.08419372141361237, 'timestamp': '2025-09-10 02:43:15.867078', 'step': 14589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:15.897060', 'step': 14589, 'epoch': 2} {'type': 'loss', 'content': 0.1480441689491272, 'timestamp': '2025-09-10 02:43:15.899413', 'step': 14590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:15.929953', 'step': 14590, 'epoch': 2} {'type': 'loss', 'content': 0.08345229178667068, 'timestamp': '2025-09-10 02:43:15.932625', 'step': 14591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:15.962641', 'step': 14591, 'epoch': 2} {'type': 'loss', 'content': 0.06592836230993271, 'timestamp': '2025-09-10 02:43:15.988121', 'step': 14592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.024332', 'step': 14592, 'epoch': 2} {'type': 'loss', 'content': 0.10963179916143417, 'timestamp': '2025-09-10 02:43:16.027792', 'step': 14593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:16.064739', 'step': 14593, 'epoch': 2} {'type': 'loss', 'content': 0.056557659059762955, 'timestamp': '2025-09-10 02:43:16.067476', 'step': 14594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:16.098382', 'step': 14594, 'epoch': 2} {'type': 'loss', 'content': 0.09961508214473724, 'timestamp': '2025-09-10 02:43:16.100521', 'step': 14595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.130809', 'step': 14595, 'epoch': 2} {'type': 'loss', 'content': 0.11399218440055847, 'timestamp': '2025-09-10 02:43:16.154923', 'step': 14596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.187194', 'step': 14596, 'epoch': 2} {'type': 'loss', 'content': 0.03421986848115921, 'timestamp': '2025-09-10 02:43:16.189608', 'step': 14597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:16.220228', 'step': 14597, 'epoch': 2} {'type': 'loss', 'content': 0.06268046796321869, 'timestamp': '2025-09-10 02:43:16.222509', 'step': 14598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:16.253209', 'step': 14598, 'epoch': 2} {'type': 'loss', 'content': 0.04381534084677696, 'timestamp': '2025-09-10 02:43:16.255425', 'step': 14599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.285655', 'step': 14599, 'epoch': 2} {'type': 'loss', 'content': 0.07068787515163422, 'timestamp': '2025-09-10 02:43:16.309085', 'step': 14600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:16.340026', 'step': 14600, 'epoch': 2} {'type': 'loss', 'content': 0.07875728607177734, 'timestamp': '2025-09-10 02:43:16.342083', 'step': 14601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.371673', 'step': 14601, 'epoch': 2} {'type': 'loss', 'content': 0.056192416697740555, 'timestamp': '2025-09-10 02:43:16.374612', 'step': 14602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:16.405595', 'step': 14602, 'epoch': 2} {'type': 'loss', 'content': 0.09745848923921585, 'timestamp': '2025-09-10 02:43:16.407803', 'step': 14603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:16.438720', 'step': 14603, 'epoch': 2} {'type': 'loss', 'content': 0.14419935643672943, 'timestamp': '2025-09-10 02:43:16.462882', 'step': 14604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:16.493384', 'step': 14604, 'epoch': 2} {'type': 'loss', 'content': 0.044362980872392654, 'timestamp': '2025-09-10 02:43:16.497099', 'step': 14605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:16.527927', 'step': 14605, 'epoch': 2} {'type': 'loss', 'content': 0.10934888571500778, 'timestamp': '2025-09-10 02:43:16.530495', 'step': 14606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:16.561383', 'step': 14606, 'epoch': 2} {'type': 'loss', 'content': 0.0535648837685585, 'timestamp': '2025-09-10 02:43:16.563920', 'step': 14607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.593587', 'step': 14607, 'epoch': 2} {'type': 'loss', 'content': 0.07974172383546829, 'timestamp': '2025-09-10 02:43:16.617501', 'step': 14608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:16.650033', 'step': 14608, 'epoch': 2} {'type': 'loss', 'content': 0.1383969932794571, 'timestamp': '2025-09-10 02:43:16.652232', 'step': 14609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.682043', 'step': 14609, 'epoch': 2} {'type': 'loss', 'content': 0.08142092078924179, 'timestamp': '2025-09-10 02:43:16.684523', 'step': 14610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.714465', 'step': 14610, 'epoch': 2} {'type': 'loss', 'content': 0.058515895158052444, 'timestamp': '2025-09-10 02:43:16.716924', 'step': 14611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:16.748641', 'step': 14611, 'epoch': 2} {'type': 'loss', 'content': 0.15831094980239868, 'timestamp': '2025-09-10 02:43:16.772197', 'step': 14612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.804061', 'step': 14612, 'epoch': 2} {'type': 'loss', 'content': 0.06091402471065521, 'timestamp': '2025-09-10 02:43:16.806161', 'step': 14613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:16.836884', 'step': 14613, 'epoch': 2} {'type': 'loss', 'content': 0.09198255836963654, 'timestamp': '2025-09-10 02:43:16.839326', 'step': 14614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:16.869602', 'step': 14614, 'epoch': 2} {'type': 'loss', 'content': 0.07481976598501205, 'timestamp': '2025-09-10 02:43:16.872016', 'step': 14615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:16.903920', 'step': 14615, 'epoch': 2} {'type': 'loss', 'content': 0.09727192670106888, 'timestamp': '2025-09-10 02:43:16.927384', 'step': 14616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:16.958170', 'step': 14616, 'epoch': 2} {'type': 'loss', 'content': 0.061162225902080536, 'timestamp': '2025-09-10 02:43:16.960275', 'step': 14617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:43:16.990385', 'step': 14617, 'epoch': 2} {'type': 'loss', 'content': 0.13849644362926483, 'timestamp': '2025-09-10 02:43:16.994947', 'step': 14618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:17.025312', 'step': 14618, 'epoch': 2} {'type': 'loss', 'content': 0.1858573704957962, 'timestamp': '2025-09-10 02:43:17.027450', 'step': 14619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:17.058362', 'step': 14619, 'epoch': 2} {'type': 'loss', 'content': 0.036693867295980453, 'timestamp': '2025-09-10 02:43:17.081824', 'step': 14620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:17.112311', 'step': 14620, 'epoch': 2} {'type': 'loss', 'content': 0.19820812344551086, 'timestamp': '2025-09-10 02:43:17.114475', 'step': 14621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:17.144871', 'step': 14621, 'epoch': 2} {'type': 'loss', 'content': 0.12591210007667542, 'timestamp': '2025-09-10 02:43:17.147268', 'step': 14622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:17.177216', 'step': 14622, 'epoch': 2} {'type': 'loss', 'content': 0.1129225343465805, 'timestamp': '2025-09-10 02:43:17.179562', 'step': 14623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:17.211944', 'step': 14623, 'epoch': 2} {'type': 'loss', 'content': 0.09690447151660919, 'timestamp': '2025-09-10 02:43:17.235763', 'step': 14624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:17.265989', 'step': 14624, 'epoch': 2} {'type': 'loss', 'content': 0.10262444615364075, 'timestamp': '2025-09-10 02:43:17.268170', 'step': 14625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:17.299348', 'step': 14625, 'epoch': 2} {'type': 'loss', 'content': 0.06737653911113739, 'timestamp': '2025-09-10 02:43:17.301432', 'step': 14626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:17.331325', 'step': 14626, 'epoch': 2} {'type': 'loss', 'content': 0.1897125542163849, 'timestamp': '2025-09-10 02:43:17.333698', 'step': 14627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:17.365278', 'step': 14627, 'epoch': 2} {'type': 'loss', 'content': 0.06231902539730072, 'timestamp': '2025-09-10 02:43:17.388780', 'step': 14628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:17.419362', 'step': 14628, 'epoch': 2} {'type': 'loss', 'content': 0.10924430191516876, 'timestamp': '2025-09-10 02:43:17.421369', 'step': 14629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:17.452270', 'step': 14629, 'epoch': 2} {'type': 'loss', 'content': 0.06378708779811859, 'timestamp': '2025-09-10 02:43:17.454509', 'step': 14630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:17.484183', 'step': 14630, 'epoch': 2} {'type': 'loss', 'content': 0.1282503604888916, 'timestamp': '2025-09-10 02:43:17.487602', 'step': 14631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:17.517880', 'step': 14631, 'epoch': 2} {'type': 'loss', 'content': 0.11698223650455475, 'timestamp': '2025-09-10 02:43:17.554011', 'step': 14632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:17.637850', 'step': 14632, 'epoch': 2} {'type': 'loss', 'content': 0.08372370898723602, 'timestamp': '2025-09-10 02:43:17.657902', 'step': 14633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:17.733846', 'step': 14633, 'epoch': 2} {'type': 'loss', 'content': 0.08241871744394302, 'timestamp': '2025-09-10 02:43:17.741910', 'step': 14634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:17.798587', 'step': 14634, 'epoch': 2} {'type': 'loss', 'content': 0.08098902553319931, 'timestamp': '2025-09-10 02:43:17.816463', 'step': 14635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:17.865872', 'step': 14635, 'epoch': 2} {'type': 'loss', 'content': 0.10477851331233978, 'timestamp': '2025-09-10 02:43:17.892485', 'step': 14636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:17.972407', 'step': 14636, 'epoch': 2} {'type': 'loss', 'content': 0.14765016734600067, 'timestamp': '2025-09-10 02:43:17.978071', 'step': 14637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:18.049665', 'step': 14637, 'epoch': 2} {'type': 'loss', 'content': 0.06574170291423798, 'timestamp': '2025-09-10 02:43:18.059933', 'step': 14638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:18.108148', 'step': 14638, 'epoch': 2} {'type': 'loss', 'content': 0.051850561052560806, 'timestamp': '2025-09-10 02:43:18.125747', 'step': 14639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:18.217124', 'step': 14639, 'epoch': 2} {'type': 'loss', 'content': 0.09493031352758408, 'timestamp': '2025-09-10 02:43:18.261080', 'step': 14640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:18.338268', 'step': 14640, 'epoch': 2} {'type': 'loss', 'content': 0.14617660641670227, 'timestamp': '2025-09-10 02:43:18.358433', 'step': 14641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:18.434975', 'step': 14641, 'epoch': 2} {'type': 'loss', 'content': 0.11623336374759674, 'timestamp': '2025-09-10 02:43:18.442877', 'step': 14642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:18.496735', 'step': 14642, 'epoch': 2} {'type': 'loss', 'content': 0.07537676393985748, 'timestamp': '2025-09-10 02:43:18.516111', 'step': 14643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:18.568032', 'step': 14643, 'epoch': 2} {'type': 'loss', 'content': 0.16166682541370392, 'timestamp': '2025-09-10 02:43:18.597848', 'step': 14644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:18.639721', 'step': 14644, 'epoch': 2} {'type': 'loss', 'content': 0.14660818874835968, 'timestamp': '2025-09-10 02:43:18.642302', 'step': 14645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:18.676811', 'step': 14645, 'epoch': 2} {'type': 'loss', 'content': 0.07154721021652222, 'timestamp': '2025-09-10 02:43:18.679544', 'step': 14646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:18.713323', 'step': 14646, 'epoch': 2} {'type': 'loss', 'content': 0.1476498246192932, 'timestamp': '2025-09-10 02:43:18.716471', 'step': 14647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:18.751473', 'step': 14647, 'epoch': 2} {'type': 'loss', 'content': 0.08611578494310379, 'timestamp': '2025-09-10 02:43:18.775648', 'step': 14648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:18.809538', 'step': 14648, 'epoch': 2} {'type': 'loss', 'content': 0.12812110781669617, 'timestamp': '2025-09-10 02:43:18.813574', 'step': 14649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:18.848946', 'step': 14649, 'epoch': 2} {'type': 'loss', 'content': 0.08156805485486984, 'timestamp': '2025-09-10 02:43:18.851117', 'step': 14650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:18.881321', 'step': 14650, 'epoch': 2} {'type': 'loss', 'content': 0.12456521391868591, 'timestamp': '2025-09-10 02:43:18.883729', 'step': 14651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:18.915076', 'step': 14651, 'epoch': 2} {'type': 'loss', 'content': 0.1628435254096985, 'timestamp': '2025-09-10 02:43:18.939258', 'step': 14652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:18.970224', 'step': 14652, 'epoch': 2} {'type': 'loss', 'content': 0.16929787397384644, 'timestamp': '2025-09-10 02:43:18.972181', 'step': 14653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:19.002425', 'step': 14653, 'epoch': 2} {'type': 'loss', 'content': 0.12713849544525146, 'timestamp': '2025-09-10 02:43:19.005625', 'step': 14654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.035977', 'step': 14654, 'epoch': 2} {'type': 'loss', 'content': 0.14470796287059784, 'timestamp': '2025-09-10 02:43:19.038589', 'step': 14655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:19.070248', 'step': 14655, 'epoch': 2} {'type': 'loss', 'content': 0.10256734490394592, 'timestamp': '2025-09-10 02:43:19.093820', 'step': 14656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:19.124115', 'step': 14656, 'epoch': 2} {'type': 'loss', 'content': 0.11644113808870316, 'timestamp': '2025-09-10 02:43:19.126661', 'step': 14657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:19.157416', 'step': 14657, 'epoch': 2} {'type': 'loss', 'content': 0.05104602500796318, 'timestamp': '2025-09-10 02:43:19.159525', 'step': 14658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:19.189718', 'step': 14658, 'epoch': 2} {'type': 'loss', 'content': 0.07142273336648941, 'timestamp': '2025-09-10 02:43:19.193054', 'step': 14659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.227306', 'step': 14659, 'epoch': 2} {'type': 'loss', 'content': 0.07047004997730255, 'timestamp': '2025-09-10 02:43:19.251412', 'step': 14660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:19.287623', 'step': 14660, 'epoch': 2} {'type': 'loss', 'content': 0.03999217972159386, 'timestamp': '2025-09-10 02:43:19.292584', 'step': 14661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:19.323369', 'step': 14661, 'epoch': 2} {'type': 'loss', 'content': 0.1109824851155281, 'timestamp': '2025-09-10 02:43:19.328914', 'step': 14662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.361031', 'step': 14662, 'epoch': 2} {'type': 'loss', 'content': 0.20619137585163116, 'timestamp': '2025-09-10 02:43:19.366340', 'step': 14663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:19.398926', 'step': 14663, 'epoch': 2} {'type': 'loss', 'content': 0.10617729276418686, 'timestamp': '2025-09-10 02:43:19.422487', 'step': 14664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:19.455825', 'step': 14664, 'epoch': 2} {'type': 'loss', 'content': 0.10817497968673706, 'timestamp': '2025-09-10 02:43:19.460524', 'step': 14665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:19.494026', 'step': 14665, 'epoch': 2} {'type': 'loss', 'content': 0.0965668186545372, 'timestamp': '2025-09-10 02:43:19.496621', 'step': 14666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.530772', 'step': 14666, 'epoch': 2} {'type': 'loss', 'content': 0.19243161380290985, 'timestamp': '2025-09-10 02:43:19.533561', 'step': 14667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.564542', 'step': 14667, 'epoch': 2} {'type': 'loss', 'content': 0.10968427360057831, 'timestamp': '2025-09-10 02:43:19.588134', 'step': 14668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:19.619791', 'step': 14668, 'epoch': 2} {'type': 'loss', 'content': 0.08226604014635086, 'timestamp': '2025-09-10 02:43:19.623682', 'step': 14669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:43:19.657636', 'step': 14669, 'epoch': 2} {'type': 'loss', 'content': 0.08226919174194336, 'timestamp': '2025-09-10 02:43:19.662010', 'step': 14670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.691840', 'step': 14670, 'epoch': 2} {'type': 'loss', 'content': 0.04455525055527687, 'timestamp': '2025-09-10 02:43:19.694559', 'step': 14671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:19.726782', 'step': 14671, 'epoch': 2} {'type': 'loss', 'content': 0.08116070181131363, 'timestamp': '2025-09-10 02:43:19.750101', 'step': 14672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.782522', 'step': 14672, 'epoch': 2} {'type': 'loss', 'content': 0.09578534215688705, 'timestamp': '2025-09-10 02:43:19.785198', 'step': 14673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:19.815587', 'step': 14673, 'epoch': 2} {'type': 'loss', 'content': 0.17480860650539398, 'timestamp': '2025-09-10 02:43:19.818499', 'step': 14674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.850935', 'step': 14674, 'epoch': 2} {'type': 'loss', 'content': 0.06802119314670563, 'timestamp': '2025-09-10 02:43:19.853281', 'step': 14675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.882743', 'step': 14675, 'epoch': 2} {'type': 'loss', 'content': 0.10448389500379562, 'timestamp': '2025-09-10 02:43:19.906598', 'step': 14676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:19.941248', 'step': 14676, 'epoch': 2} {'type': 'loss', 'content': 0.11247995495796204, 'timestamp': '2025-09-10 02:43:19.943827', 'step': 14677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:19.977375', 'step': 14677, 'epoch': 2} {'type': 'loss', 'content': 0.065033458173275, 'timestamp': '2025-09-10 02:43:19.984644', 'step': 14678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.017356', 'step': 14678, 'epoch': 2} {'type': 'loss', 'content': 0.05453713610768318, 'timestamp': '2025-09-10 02:43:20.020337', 'step': 14679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:20.054361', 'step': 14679, 'epoch': 2} {'type': 'loss', 'content': 0.11345401406288147, 'timestamp': '2025-09-10 02:43:20.077911', 'step': 14680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.115244', 'step': 14680, 'epoch': 2} {'type': 'loss', 'content': 0.08305457979440689, 'timestamp': '2025-09-10 02:43:20.117636', 'step': 14681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:20.148686', 'step': 14681, 'epoch': 2} {'type': 'loss', 'content': 0.08572859317064285, 'timestamp': '2025-09-10 02:43:20.150948', 'step': 14682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:20.181070', 'step': 14682, 'epoch': 2} {'type': 'loss', 'content': 0.1060982272028923, 'timestamp': '2025-09-10 02:43:20.183674', 'step': 14683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:20.215353', 'step': 14683, 'epoch': 2} {'type': 'loss', 'content': 0.09335669875144958, 'timestamp': '2025-09-10 02:43:20.238914', 'step': 14684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:20.276386', 'step': 14684, 'epoch': 2} {'type': 'loss', 'content': 0.12935087084770203, 'timestamp': '2025-09-10 02:43:20.278698', 'step': 14685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:20.308434', 'step': 14685, 'epoch': 2} {'type': 'loss', 'content': 0.12672096490859985, 'timestamp': '2025-09-10 02:43:20.310619', 'step': 14686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:20.341785', 'step': 14686, 'epoch': 2} {'type': 'loss', 'content': 0.060825858265161514, 'timestamp': '2025-09-10 02:43:20.344642', 'step': 14687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.378195', 'step': 14687, 'epoch': 2} {'type': 'loss', 'content': 0.04437196999788284, 'timestamp': '2025-09-10 02:43:20.401621', 'step': 14688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:43:20.431874', 'step': 14688, 'epoch': 2} {'type': 'loss', 'content': 0.09904494136571884, 'timestamp': '2025-09-10 02:43:20.434537', 'step': 14689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:20.465006', 'step': 14689, 'epoch': 2} {'type': 'loss', 'content': 0.09778065979480743, 'timestamp': '2025-09-10 02:43:20.467529', 'step': 14690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.500543', 'step': 14690, 'epoch': 2} {'type': 'loss', 'content': 0.11328308284282684, 'timestamp': '2025-09-10 02:43:20.502731', 'step': 14691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.532614', 'step': 14691, 'epoch': 2} {'type': 'loss', 'content': 0.06530524045228958, 'timestamp': '2025-09-10 02:43:20.557531', 'step': 14692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.588495', 'step': 14692, 'epoch': 2} {'type': 'loss', 'content': 0.040191590785980225, 'timestamp': '2025-09-10 02:43:20.590685', 'step': 14693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.620882', 'step': 14693, 'epoch': 2} {'type': 'loss', 'content': 0.08012858778238297, 'timestamp': '2025-09-10 02:43:20.623141', 'step': 14694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.653940', 'step': 14694, 'epoch': 2} {'type': 'loss', 'content': 0.015578118152916431, 'timestamp': '2025-09-10 02:43:20.657689', 'step': 14695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:20.690756', 'step': 14695, 'epoch': 2} {'type': 'loss', 'content': 0.03277591988444328, 'timestamp': '2025-09-10 02:43:20.714365', 'step': 14696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.744825', 'step': 14696, 'epoch': 2} {'type': 'loss', 'content': 0.08645704388618469, 'timestamp': '2025-09-10 02:43:20.747189', 'step': 14697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.778556', 'step': 14697, 'epoch': 2} {'type': 'loss', 'content': 0.04481436684727669, 'timestamp': '2025-09-10 02:43:20.791442', 'step': 14698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:20.825866', 'step': 14698, 'epoch': 2} {'type': 'loss', 'content': 0.10470803081989288, 'timestamp': '2025-09-10 02:43:20.828077', 'step': 14699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:20.864729', 'step': 14699, 'epoch': 2} {'type': 'loss', 'content': 0.13274306058883667, 'timestamp': '2025-09-10 02:43:20.888137', 'step': 14700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:20.918901', 'step': 14700, 'epoch': 2} {'type': 'loss', 'content': 0.08113899827003479, 'timestamp': '2025-09-10 02:43:20.922456', 'step': 14701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:20.952957', 'step': 14701, 'epoch': 2} {'type': 'loss', 'content': 0.02658083103597164, 'timestamp': '2025-09-10 02:43:20.956462', 'step': 14702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:20.989681', 'step': 14702, 'epoch': 2} {'type': 'loss', 'content': 0.172275573015213, 'timestamp': '2025-09-10 02:43:20.991856', 'step': 14703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:21.023530', 'step': 14703, 'epoch': 2} {'type': 'loss', 'content': 0.048669058829545975, 'timestamp': '2025-09-10 02:43:21.047005', 'step': 14704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:21.090213', 'step': 14704, 'epoch': 2} {'type': 'loss', 'content': 0.12405917793512344, 'timestamp': '2025-09-10 02:43:21.092647', 'step': 14705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:21.122211', 'step': 14705, 'epoch': 2} {'type': 'loss', 'content': 0.10277149826288223, 'timestamp': '2025-09-10 02:43:21.124647', 'step': 14706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.155283', 'step': 14706, 'epoch': 2} {'type': 'loss', 'content': 0.0650743618607521, 'timestamp': '2025-09-10 02:43:21.157293', 'step': 14707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:21.200766', 'step': 14707, 'epoch': 2} {'type': 'loss', 'content': 0.15644006431102753, 'timestamp': '2025-09-10 02:43:21.224304', 'step': 14708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:21.255361', 'step': 14708, 'epoch': 2} {'type': 'loss', 'content': 0.07655786722898483, 'timestamp': '2025-09-10 02:43:21.259228', 'step': 14709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.292172', 'step': 14709, 'epoch': 2} {'type': 'loss', 'content': 0.07214972376823425, 'timestamp': '2025-09-10 02:43:21.294318', 'step': 14710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:21.324351', 'step': 14710, 'epoch': 2} {'type': 'loss', 'content': 0.06411440670490265, 'timestamp': '2025-09-10 02:43:21.327875', 'step': 14711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:21.360117', 'step': 14711, 'epoch': 2} {'type': 'loss', 'content': 0.10441254079341888, 'timestamp': '2025-09-10 02:43:21.383903', 'step': 14712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.414842', 'step': 14712, 'epoch': 2} {'type': 'loss', 'content': 0.14904609322547913, 'timestamp': '2025-09-10 02:43:21.417445', 'step': 14713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:21.448051', 'step': 14713, 'epoch': 2} {'type': 'loss', 'content': 0.09982328861951828, 'timestamp': '2025-09-10 02:43:21.451662', 'step': 14714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.482393', 'step': 14714, 'epoch': 2} {'type': 'loss', 'content': 0.0730830505490303, 'timestamp': '2025-09-10 02:43:21.484631', 'step': 14715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:21.514347', 'step': 14715, 'epoch': 2} {'type': 'loss', 'content': 0.11994755268096924, 'timestamp': '2025-09-10 02:43:21.537924', 'step': 14716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:21.570656', 'step': 14716, 'epoch': 2} {'type': 'loss', 'content': 0.08299731463193893, 'timestamp': '2025-09-10 02:43:21.572986', 'step': 14717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:21.603400', 'step': 14717, 'epoch': 2} {'type': 'loss', 'content': 0.10927833616733551, 'timestamp': '2025-09-10 02:43:21.605751', 'step': 14718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:21.636254', 'step': 14718, 'epoch': 2} {'type': 'loss', 'content': 0.14321747422218323, 'timestamp': '2025-09-10 02:43:21.638856', 'step': 14719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.672199', 'step': 14719, 'epoch': 2} {'type': 'loss', 'content': 0.18908828496932983, 'timestamp': '2025-09-10 02:43:21.695771', 'step': 14720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:21.726167', 'step': 14720, 'epoch': 2} {'type': 'loss', 'content': 0.16282671689987183, 'timestamp': '2025-09-10 02:43:21.729585', 'step': 14721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:21.759680', 'step': 14721, 'epoch': 2} {'type': 'loss', 'content': 0.0922078862786293, 'timestamp': '2025-09-10 02:43:21.762513', 'step': 14722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.794226', 'step': 14722, 'epoch': 2} {'type': 'loss', 'content': 0.06632821261882782, 'timestamp': '2025-09-10 02:43:21.796252', 'step': 14723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.826029', 'step': 14723, 'epoch': 2} {'type': 'loss', 'content': 0.05793672427535057, 'timestamp': '2025-09-10 02:43:21.849601', 'step': 14724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.880856', 'step': 14724, 'epoch': 2} {'type': 'loss', 'content': 0.14644764363765717, 'timestamp': '2025-09-10 02:43:21.883571', 'step': 14725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:21.914813', 'step': 14725, 'epoch': 2} {'type': 'loss', 'content': 0.09644299745559692, 'timestamp': '2025-09-10 02:43:21.917884', 'step': 14726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.949450', 'step': 14726, 'epoch': 2} {'type': 'loss', 'content': 0.019972121343016624, 'timestamp': '2025-09-10 02:43:21.953390', 'step': 14727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:21.984121', 'step': 14727, 'epoch': 2} {'type': 'loss', 'content': 0.13938596844673157, 'timestamp': '2025-09-10 02:43:22.008581', 'step': 14728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:22.038787', 'step': 14728, 'epoch': 2} {'type': 'loss', 'content': 0.10956380516290665, 'timestamp': '2025-09-10 02:43:22.042318', 'step': 14729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:22.074208', 'step': 14729, 'epoch': 2} {'type': 'loss', 'content': 0.10212760418653488, 'timestamp': '2025-09-10 02:43:22.076298', 'step': 14730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:22.106251', 'step': 14730, 'epoch': 2} {'type': 'loss', 'content': 0.13638538122177124, 'timestamp': '2025-09-10 02:43:22.108903', 'step': 14731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:22.139347', 'step': 14731, 'epoch': 2} {'type': 'loss', 'content': 0.14790280163288116, 'timestamp': '2025-09-10 02:43:22.162930', 'step': 14732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:22.206329', 'step': 14732, 'epoch': 2} {'type': 'loss', 'content': 0.1376335620880127, 'timestamp': '2025-09-10 02:43:22.208915', 'step': 14733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:22.240548', 'step': 14733, 'epoch': 2} {'type': 'loss', 'content': 0.09738381952047348, 'timestamp': '2025-09-10 02:43:22.242935', 'step': 14734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:22.274874', 'step': 14734, 'epoch': 2} {'type': 'loss', 'content': 0.05657666176557541, 'timestamp': '2025-09-10 02:43:22.277224', 'step': 14735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:22.309647', 'step': 14735, 'epoch': 2} {'type': 'loss', 'content': 0.1549900621175766, 'timestamp': '2025-09-10 02:43:22.333626', 'step': 14736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:22.364559', 'step': 14736, 'epoch': 2} {'type': 'loss', 'content': 0.040787339210510254, 'timestamp': '2025-09-10 02:43:22.367310', 'step': 14737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:22.403004', 'step': 14737, 'epoch': 2} {'type': 'loss', 'content': 0.13676588237285614, 'timestamp': '2025-09-10 02:43:22.406217', 'step': 14738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:22.440193', 'step': 14738, 'epoch': 2} {'type': 'loss', 'content': 0.056639380753040314, 'timestamp': '2025-09-10 02:43:22.442770', 'step': 14739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:22.476768', 'step': 14739, 'epoch': 2} {'type': 'loss', 'content': 0.07894163578748703, 'timestamp': '2025-09-10 02:43:22.503173', 'step': 14740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:22.578033', 'step': 14740, 'epoch': 2} {'type': 'loss', 'content': 0.2151884138584137, 'timestamp': '2025-09-10 02:43:22.584162', 'step': 14741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:22.620351', 'step': 14741, 'epoch': 2} {'type': 'loss', 'content': 0.05566801503300667, 'timestamp': '2025-09-10 02:43:22.623912', 'step': 14742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:22.657269', 'step': 14742, 'epoch': 2} {'type': 'loss', 'content': 0.08229733258485794, 'timestamp': '2025-09-10 02:43:22.661722', 'step': 14743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:22.700738', 'step': 14743, 'epoch': 2} {'type': 'loss', 'content': 0.0025429611559957266, 'timestamp': '2025-09-10 02:43:22.725173', 'step': 14744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:22.759733', 'step': 14744, 'epoch': 2} {'type': 'loss', 'content': 0.04389524087309837, 'timestamp': '2025-09-10 02:43:22.766396', 'step': 14745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:22.808157', 'step': 14745, 'epoch': 2} {'type': 'loss', 'content': 0.11057983338832855, 'timestamp': '2025-09-10 02:43:22.813161', 'step': 14746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:22.853351', 'step': 14746, 'epoch': 2} {'type': 'loss', 'content': 0.08491654694080353, 'timestamp': '2025-09-10 02:43:22.860988', 'step': 14747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:22.925365', 'step': 14747, 'epoch': 2} {'type': 'loss', 'content': 0.08729538321495056, 'timestamp': '2025-09-10 02:43:22.958941', 'step': 14748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:23.014890', 'step': 14748, 'epoch': 2} {'type': 'loss', 'content': 0.056024886667728424, 'timestamp': '2025-09-10 02:43:23.027898', 'step': 14749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.089762', 'step': 14749, 'epoch': 2} {'type': 'loss', 'content': 0.09503307938575745, 'timestamp': '2025-09-10 02:43:23.102786', 'step': 14750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:23.167425', 'step': 14750, 'epoch': 2} {'type': 'loss', 'content': 0.08049322664737701, 'timestamp': '2025-09-10 02:43:23.173545', 'step': 14751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.217015', 'step': 14751, 'epoch': 2} {'type': 'loss', 'content': 0.09738199412822723, 'timestamp': '2025-09-10 02:43:23.241739', 'step': 14752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.278664', 'step': 14752, 'epoch': 2} {'type': 'loss', 'content': 0.1016940325498581, 'timestamp': '2025-09-10 02:43:23.282696', 'step': 14753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.318231', 'step': 14753, 'epoch': 2} {'type': 'loss', 'content': 0.07642752677202225, 'timestamp': '2025-09-10 02:43:23.321927', 'step': 14754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.359526', 'step': 14754, 'epoch': 2} {'type': 'loss', 'content': 0.0875941663980484, 'timestamp': '2025-09-10 02:43:23.364561', 'step': 14755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.399302', 'step': 14755, 'epoch': 2} {'type': 'loss', 'content': 0.11251349002122879, 'timestamp': '2025-09-10 02:43:23.430162', 'step': 14756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:23.472459', 'step': 14756, 'epoch': 2} {'type': 'loss', 'content': 0.10369029641151428, 'timestamp': '2025-09-10 02:43:23.477839', 'step': 14757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.515412', 'step': 14757, 'epoch': 2} {'type': 'loss', 'content': 0.12453782558441162, 'timestamp': '2025-09-10 02:43:23.518292', 'step': 14758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.549885', 'step': 14758, 'epoch': 2} {'type': 'loss', 'content': 0.054347191005945206, 'timestamp': '2025-09-10 02:43:23.552231', 'step': 14759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:23.582663', 'step': 14759, 'epoch': 2} {'type': 'loss', 'content': 0.059221051633358, 'timestamp': '2025-09-10 02:43:23.606514', 'step': 14760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:23.637215', 'step': 14760, 'epoch': 2} {'type': 'loss', 'content': 0.10276734828948975, 'timestamp': '2025-09-10 02:43:23.639327', 'step': 14761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.669747', 'step': 14761, 'epoch': 2} {'type': 'loss', 'content': 0.10991991311311722, 'timestamp': '2025-09-10 02:43:23.672225', 'step': 14762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:23.702390', 'step': 14762, 'epoch': 2} {'type': 'loss', 'content': 0.11315009742975235, 'timestamp': '2025-09-10 02:43:23.704715', 'step': 14763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:23.734562', 'step': 14763, 'epoch': 2} {'type': 'loss', 'content': 0.05309247598052025, 'timestamp': '2025-09-10 02:43:23.758439', 'step': 14764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:23.790517', 'step': 14764, 'epoch': 2} {'type': 'loss', 'content': 0.07913877069950104, 'timestamp': '2025-09-10 02:43:23.794906', 'step': 14765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.828208', 'step': 14765, 'epoch': 2} {'type': 'loss', 'content': 0.10309168696403503, 'timestamp': '2025-09-10 02:43:23.830593', 'step': 14766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:23.861367', 'step': 14766, 'epoch': 2} {'type': 'loss', 'content': 0.06623229384422302, 'timestamp': '2025-09-10 02:43:23.863698', 'step': 14767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:23.894352', 'step': 14767, 'epoch': 2} {'type': 'loss', 'content': 0.11489325761795044, 'timestamp': '2025-09-10 02:43:23.917906', 'step': 14768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:23.948158', 'step': 14768, 'epoch': 2} {'type': 'loss', 'content': 0.12037670612335205, 'timestamp': '2025-09-10 02:43:23.950618', 'step': 14769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:23.982472', 'step': 14769, 'epoch': 2} {'type': 'loss', 'content': 0.1094687432050705, 'timestamp': '2025-09-10 02:43:23.985263', 'step': 14770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:24.015616', 'step': 14770, 'epoch': 2} {'type': 'loss', 'content': 0.08384706825017929, 'timestamp': '2025-09-10 02:43:24.018492', 'step': 14771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:24.052395', 'step': 14771, 'epoch': 2} {'type': 'loss', 'content': 0.08261770009994507, 'timestamp': '2025-09-10 02:43:24.076123', 'step': 14772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:24.106796', 'step': 14772, 'epoch': 2} {'type': 'loss', 'content': 0.1107078269124031, 'timestamp': '2025-09-10 02:43:24.109128', 'step': 14773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:24.139048', 'step': 14773, 'epoch': 2} {'type': 'loss', 'content': 0.11672214418649673, 'timestamp': '2025-09-10 02:43:24.141433', 'step': 14774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:24.172367', 'step': 14774, 'epoch': 2} {'type': 'loss', 'content': 0.12618541717529297, 'timestamp': '2025-09-10 02:43:24.175356', 'step': 14775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:24.206535', 'step': 14775, 'epoch': 2} {'type': 'loss', 'content': 0.07383807003498077, 'timestamp': '2025-09-10 02:43:24.229936', 'step': 14776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:24.261732', 'step': 14776, 'epoch': 2} {'type': 'loss', 'content': 0.13178254663944244, 'timestamp': '2025-09-10 02:43:24.263949', 'step': 14777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:24.294344', 'step': 14777, 'epoch': 2} {'type': 'loss', 'content': 0.13877373933792114, 'timestamp': '2025-09-10 02:43:24.296988', 'step': 14778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:24.327283', 'step': 14778, 'epoch': 2} {'type': 'loss', 'content': 0.08295851200819016, 'timestamp': '2025-09-10 02:43:24.329968', 'step': 14779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:24.360336', 'step': 14779, 'epoch': 2} {'type': 'loss', 'content': 0.11327465623617172, 'timestamp': '2025-09-10 02:43:24.383949', 'step': 14780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:24.415047', 'step': 14780, 'epoch': 2} {'type': 'loss', 'content': 0.2524588704109192, 'timestamp': '2025-09-10 02:43:24.417236', 'step': 14781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:24.447563', 'step': 14781, 'epoch': 2} {'type': 'loss', 'content': 0.0925779864192009, 'timestamp': '2025-09-10 02:43:24.450236', 'step': 14782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:24.480886', 'step': 14782, 'epoch': 2} {'type': 'loss', 'content': 0.0817563608288765, 'timestamp': '2025-09-10 02:43:24.482765', 'step': 14783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:24.512806', 'step': 14783, 'epoch': 2} {'type': 'loss', 'content': 0.026896212249994278, 'timestamp': '2025-09-10 02:43:24.536719', 'step': 14784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:24.567290', 'step': 14784, 'epoch': 2} {'type': 'loss', 'content': 0.0790257528424263, 'timestamp': '2025-09-10 02:43:24.569806', 'step': 14785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:24.600670', 'step': 14785, 'epoch': 2} {'type': 'loss', 'content': 0.09427336603403091, 'timestamp': '2025-09-10 02:43:24.602818', 'step': 14786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:24.633176', 'step': 14786, 'epoch': 2} {'type': 'loss', 'content': 0.07785215973854065, 'timestamp': '2025-09-10 02:43:24.635378', 'step': 14787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:24.666067', 'step': 14787, 'epoch': 2} {'type': 'loss', 'content': 0.10692547261714935, 'timestamp': '2025-09-10 02:43:24.689482', 'step': 14788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:24.719666', 'step': 14788, 'epoch': 2} {'type': 'loss', 'content': 0.07501757144927979, 'timestamp': '2025-09-10 02:43:24.721879', 'step': 14789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:24.751429', 'step': 14789, 'epoch': 2} {'type': 'loss', 'content': 0.16973350942134857, 'timestamp': '2025-09-10 02:43:24.753660', 'step': 14790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:24.784154', 'step': 14790, 'epoch': 2} {'type': 'loss', 'content': 0.06859229505062103, 'timestamp': '2025-09-10 02:43:24.786400', 'step': 14791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:24.816470', 'step': 14791, 'epoch': 2} {'type': 'loss', 'content': 0.08783964812755585, 'timestamp': '2025-09-10 02:43:24.839577', 'step': 14792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:24.869923', 'step': 14792, 'epoch': 2} {'type': 'loss', 'content': 0.10282031446695328, 'timestamp': '2025-09-10 02:43:24.871893', 'step': 14793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:24.901719', 'step': 14793, 'epoch': 2} {'type': 'loss', 'content': 0.057108260691165924, 'timestamp': '2025-09-10 02:43:24.903931', 'step': 14794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:24.933866', 'step': 14794, 'epoch': 2} {'type': 'loss', 'content': 0.06739924848079681, 'timestamp': '2025-09-10 02:43:24.936779', 'step': 14795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:24.967598', 'step': 14795, 'epoch': 2} {'type': 'loss', 'content': 0.0901266559958458, 'timestamp': '2025-09-10 02:43:24.990987', 'step': 14796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:25.022386', 'step': 14796, 'epoch': 2} {'type': 'loss', 'content': 0.1475249081850052, 'timestamp': '2025-09-10 02:43:25.024567', 'step': 14797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:25.055440', 'step': 14797, 'epoch': 2} {'type': 'loss', 'content': 0.04876380413770676, 'timestamp': '2025-09-10 02:43:25.058125', 'step': 14798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:25.088500', 'step': 14798, 'epoch': 2} {'type': 'loss', 'content': 0.10551363974809647, 'timestamp': '2025-09-10 02:43:25.091463', 'step': 14799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:25.121360', 'step': 14799, 'epoch': 2} {'type': 'loss', 'content': 0.10823224484920502, 'timestamp': '2025-09-10 02:43:25.144583', 'step': 14800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:25.175207', 'step': 14800, 'epoch': 2} {'type': 'loss', 'content': 0.11908117681741714, 'timestamp': '2025-09-10 02:43:25.177361', 'step': 14801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:25.206933', 'step': 14801, 'epoch': 2} {'type': 'loss', 'content': 0.13559198379516602, 'timestamp': '2025-09-10 02:43:25.209261', 'step': 14802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:25.239363', 'step': 14802, 'epoch': 2} {'type': 'loss', 'content': 0.0791204497218132, 'timestamp': '2025-09-10 02:43:25.241573', 'step': 14803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:25.271373', 'step': 14803, 'epoch': 2} {'type': 'loss', 'content': 0.12541936337947845, 'timestamp': '2025-09-10 02:43:25.294638', 'step': 14804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:25.324828', 'step': 14804, 'epoch': 2} {'type': 'loss', 'content': 0.055237025022506714, 'timestamp': '2025-09-10 02:43:25.327175', 'step': 14805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:25.357494', 'step': 14805, 'epoch': 2} {'type': 'loss', 'content': 0.14516735076904297, 'timestamp': '2025-09-10 02:43:25.359703', 'step': 14806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:25.393249', 'step': 14806, 'epoch': 2} {'type': 'loss', 'content': 0.07023707032203674, 'timestamp': '2025-09-10 02:43:25.395733', 'step': 14807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:25.425838', 'step': 14807, 'epoch': 2} {'type': 'loss', 'content': 0.08581805974245071, 'timestamp': '2025-09-10 02:43:25.449508', 'step': 14808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:25.480388', 'step': 14808, 'epoch': 2} {'type': 'loss', 'content': 0.05009593814611435, 'timestamp': '2025-09-10 02:43:25.482653', 'step': 14809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:25.513128', 'step': 14809, 'epoch': 2} {'type': 'loss', 'content': 0.06869087368249893, 'timestamp': '2025-09-10 02:43:25.515264', 'step': 14810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:25.547716', 'step': 14810, 'epoch': 2} {'type': 'loss', 'content': 0.09127428382635117, 'timestamp': '2025-09-10 02:43:25.550062', 'step': 14811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:25.580196', 'step': 14811, 'epoch': 2} {'type': 'loss', 'content': 0.13702595233917236, 'timestamp': '2025-09-10 02:43:25.603381', 'step': 14812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:25.635025', 'step': 14812, 'epoch': 2} {'type': 'loss', 'content': 0.09182902425527573, 'timestamp': '2025-09-10 02:43:25.637164', 'step': 14813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:25.666897', 'step': 14813, 'epoch': 2} {'type': 'loss', 'content': 0.08274418860673904, 'timestamp': '2025-09-10 02:43:25.669066', 'step': 14814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:25.698610', 'step': 14814, 'epoch': 2} {'type': 'loss', 'content': 0.10807483643293381, 'timestamp': '2025-09-10 02:43:25.700859', 'step': 14815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:25.731203', 'step': 14815, 'epoch': 2} {'type': 'loss', 'content': 0.08320259302854538, 'timestamp': '2025-09-10 02:43:25.754838', 'step': 14816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:25.786679', 'step': 14816, 'epoch': 2} {'type': 'loss', 'content': 0.18427309393882751, 'timestamp': '2025-09-10 02:43:25.794276', 'step': 14817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:25.833234', 'step': 14817, 'epoch': 2} {'type': 'loss', 'content': 0.040220022201538086, 'timestamp': '2025-09-10 02:43:25.835676', 'step': 14818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:25.866090', 'step': 14818, 'epoch': 2} {'type': 'loss', 'content': 0.12401244044303894, 'timestamp': '2025-09-10 02:43:25.869194', 'step': 14819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:25.899052', 'step': 14819, 'epoch': 2} {'type': 'loss', 'content': 0.07784286141395569, 'timestamp': '2025-09-10 02:43:25.922582', 'step': 14820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:25.955439', 'step': 14820, 'epoch': 2} {'type': 'loss', 'content': 0.1709814816713333, 'timestamp': '2025-09-10 02:43:25.957867', 'step': 14821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:25.988421', 'step': 14821, 'epoch': 2} {'type': 'loss', 'content': 0.09859165549278259, 'timestamp': '2025-09-10 02:43:25.990659', 'step': 14822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.021135', 'step': 14822, 'epoch': 2} {'type': 'loss', 'content': 0.02750578708946705, 'timestamp': '2025-09-10 02:43:26.023423', 'step': 14823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:26.056846', 'step': 14823, 'epoch': 2} {'type': 'loss', 'content': 0.08123037964105606, 'timestamp': '2025-09-10 02:43:26.080921', 'step': 14824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:26.111133', 'step': 14824, 'epoch': 2} {'type': 'loss', 'content': 0.19584552943706512, 'timestamp': '2025-09-10 02:43:26.113529', 'step': 14825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:26.143482', 'step': 14825, 'epoch': 2} {'type': 'loss', 'content': 0.05696595460176468, 'timestamp': '2025-09-10 02:43:26.145740', 'step': 14826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:26.175496', 'step': 14826, 'epoch': 2} {'type': 'loss', 'content': 0.055485088378190994, 'timestamp': '2025-09-10 02:43:26.178793', 'step': 14827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:26.210803', 'step': 14827, 'epoch': 2} {'type': 'loss', 'content': 0.13192640244960785, 'timestamp': '2025-09-10 02:43:26.235092', 'step': 14828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.268418', 'step': 14828, 'epoch': 2} {'type': 'loss', 'content': 0.12664861977100372, 'timestamp': '2025-09-10 02:43:26.270588', 'step': 14829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.303356', 'step': 14829, 'epoch': 2} {'type': 'loss', 'content': 0.10975755751132965, 'timestamp': '2025-09-10 02:43:26.306186', 'step': 14830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:43:26.337746', 'step': 14830, 'epoch': 2} {'type': 'loss', 'content': 0.06640625, 'timestamp': '2025-09-10 02:43:26.342072', 'step': 14831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.373812', 'step': 14831, 'epoch': 2} {'type': 'loss', 'content': 0.09851177781820297, 'timestamp': '2025-09-10 02:43:26.396944', 'step': 14832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.427341', 'step': 14832, 'epoch': 2} {'type': 'loss', 'content': 0.11896780878305435, 'timestamp': '2025-09-10 02:43:26.429667', 'step': 14833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:26.462555', 'step': 14833, 'epoch': 2} {'type': 'loss', 'content': 0.1504165232181549, 'timestamp': '2025-09-10 02:43:26.465674', 'step': 14834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.496190', 'step': 14834, 'epoch': 2} {'type': 'loss', 'content': 0.09768921136856079, 'timestamp': '2025-09-10 02:43:26.498336', 'step': 14835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:26.528885', 'step': 14835, 'epoch': 2} {'type': 'loss', 'content': 0.15620927512645721, 'timestamp': '2025-09-10 02:43:26.552480', 'step': 14836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:26.583138', 'step': 14836, 'epoch': 2} {'type': 'loss', 'content': 0.10122406482696533, 'timestamp': '2025-09-10 02:43:26.585766', 'step': 14837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:26.616944', 'step': 14837, 'epoch': 2} {'type': 'loss', 'content': 0.06563529372215271, 'timestamp': '2025-09-10 02:43:26.619535', 'step': 14838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.650584', 'step': 14838, 'epoch': 2} {'type': 'loss', 'content': 0.041132114827632904, 'timestamp': '2025-09-10 02:43:26.653059', 'step': 14839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:26.683889', 'step': 14839, 'epoch': 2} {'type': 'loss', 'content': 0.13001017272472382, 'timestamp': '2025-09-10 02:43:26.707868', 'step': 14840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.737730', 'step': 14840, 'epoch': 2} {'type': 'loss', 'content': 0.11672374606132507, 'timestamp': '2025-09-10 02:43:26.740574', 'step': 14841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:26.771660', 'step': 14841, 'epoch': 2} {'type': 'loss', 'content': 0.07690789550542831, 'timestamp': '2025-09-10 02:43:26.774068', 'step': 14842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.804171', 'step': 14842, 'epoch': 2} {'type': 'loss', 'content': 0.11165250092744827, 'timestamp': '2025-09-10 02:43:26.807927', 'step': 14843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.838287', 'step': 14843, 'epoch': 2} {'type': 'loss', 'content': 0.09300092607736588, 'timestamp': '2025-09-10 02:43:26.863589', 'step': 14844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.893721', 'step': 14844, 'epoch': 2} {'type': 'loss', 'content': 0.1299503594636917, 'timestamp': '2025-09-10 02:43:26.896575', 'step': 14845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:26.927307', 'step': 14845, 'epoch': 2} {'type': 'loss', 'content': 0.04360746592283249, 'timestamp': '2025-09-10 02:43:26.929519', 'step': 14846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:26.960233', 'step': 14846, 'epoch': 2} {'type': 'loss', 'content': 0.03886956721544266, 'timestamp': '2025-09-10 02:43:26.962529', 'step': 14847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:26.992992', 'step': 14847, 'epoch': 2} {'type': 'loss', 'content': 0.10383052378892899, 'timestamp': '2025-09-10 02:43:27.016453', 'step': 14848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:27.047492', 'step': 14848, 'epoch': 2} {'type': 'loss', 'content': 0.12878838181495667, 'timestamp': '2025-09-10 02:43:27.050269', 'step': 14849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:27.080852', 'step': 14849, 'epoch': 2} {'type': 'loss', 'content': 0.09416835755109787, 'timestamp': '2025-09-10 02:43:27.083032', 'step': 14850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:27.112819', 'step': 14850, 'epoch': 2} {'type': 'loss', 'content': 0.13806161284446716, 'timestamp': '2025-09-10 02:43:27.114843', 'step': 14851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.144966', 'step': 14851, 'epoch': 2} {'type': 'loss', 'content': 0.11667735874652863, 'timestamp': '2025-09-10 02:43:27.173377', 'step': 14852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:27.215212', 'step': 14852, 'epoch': 2} {'type': 'loss', 'content': 0.1321597397327423, 'timestamp': '2025-09-10 02:43:27.221140', 'step': 14853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:27.252185', 'step': 14853, 'epoch': 2} {'type': 'loss', 'content': 0.16604569554328918, 'timestamp': '2025-09-10 02:43:27.254601', 'step': 14854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:27.284311', 'step': 14854, 'epoch': 2} {'type': 'loss', 'content': 0.045089758932590485, 'timestamp': '2025-09-10 02:43:27.286355', 'step': 14855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.315985', 'step': 14855, 'epoch': 2} {'type': 'loss', 'content': 0.0813407376408577, 'timestamp': '2025-09-10 02:43:27.339743', 'step': 14856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:27.371551', 'step': 14856, 'epoch': 2} {'type': 'loss', 'content': 0.08417587727308273, 'timestamp': '2025-09-10 02:43:27.375093', 'step': 14857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:27.407399', 'step': 14857, 'epoch': 2} {'type': 'loss', 'content': 0.052511170506477356, 'timestamp': '2025-09-10 02:43:27.409723', 'step': 14858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.440420', 'step': 14858, 'epoch': 2} {'type': 'loss', 'content': 0.08492345362901688, 'timestamp': '2025-09-10 02:43:27.443246', 'step': 14859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.474541', 'step': 14859, 'epoch': 2} {'type': 'loss', 'content': 0.09413957595825195, 'timestamp': '2025-09-10 02:43:27.498022', 'step': 14860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:27.528281', 'step': 14860, 'epoch': 2} {'type': 'loss', 'content': 0.1355566829442978, 'timestamp': '2025-09-10 02:43:27.530914', 'step': 14861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.561759', 'step': 14861, 'epoch': 2} {'type': 'loss', 'content': 0.1188368946313858, 'timestamp': '2025-09-10 02:43:27.564099', 'step': 14862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:27.594907', 'step': 14862, 'epoch': 2} {'type': 'loss', 'content': 0.11204656213521957, 'timestamp': '2025-09-10 02:43:27.597554', 'step': 14863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.627776', 'step': 14863, 'epoch': 2} {'type': 'loss', 'content': 0.11181367933750153, 'timestamp': '2025-09-10 02:43:27.651333', 'step': 14864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:27.684935', 'step': 14864, 'epoch': 2} {'type': 'loss', 'content': 0.07329090684652328, 'timestamp': '2025-09-10 02:43:27.687103', 'step': 14865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:27.717347', 'step': 14865, 'epoch': 2} {'type': 'loss', 'content': 0.09001517295837402, 'timestamp': '2025-09-10 02:43:27.719621', 'step': 14866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.750020', 'step': 14866, 'epoch': 2} {'type': 'loss', 'content': 0.10583867877721786, 'timestamp': '2025-09-10 02:43:27.753257', 'step': 14867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.783146', 'step': 14867, 'epoch': 2} {'type': 'loss', 'content': 0.2520940601825714, 'timestamp': '2025-09-10 02:43:27.806735', 'step': 14868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:27.837462', 'step': 14868, 'epoch': 2} {'type': 'loss', 'content': 0.17224529385566711, 'timestamp': '2025-09-10 02:43:27.839662', 'step': 14869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.870084', 'step': 14869, 'epoch': 2} {'type': 'loss', 'content': 0.07255259156227112, 'timestamp': '2025-09-10 02:43:27.872600', 'step': 14870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:27.902532', 'step': 14870, 'epoch': 2} {'type': 'loss', 'content': 0.02196533791720867, 'timestamp': '2025-09-10 02:43:27.904596', 'step': 14871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:27.934612', 'step': 14871, 'epoch': 2} {'type': 'loss', 'content': 0.0704745352268219, 'timestamp': '2025-09-10 02:43:27.958261', 'step': 14872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:27.988316', 'step': 14872, 'epoch': 2} {'type': 'loss', 'content': 0.1006944328546524, 'timestamp': '2025-09-10 02:43:27.990614', 'step': 14873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:28.021040', 'step': 14873, 'epoch': 2} {'type': 'loss', 'content': 0.041777048259973526, 'timestamp': '2025-09-10 02:43:28.022932', 'step': 14874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.052919', 'step': 14874, 'epoch': 2} {'type': 'loss', 'content': 0.03530066832900047, 'timestamp': '2025-09-10 02:43:28.056105', 'step': 14875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:28.086590', 'step': 14875, 'epoch': 2} {'type': 'loss', 'content': 0.04952645301818848, 'timestamp': '2025-09-10 02:43:28.110038', 'step': 14876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:28.141034', 'step': 14876, 'epoch': 2} {'type': 'loss', 'content': 0.14588017761707306, 'timestamp': '2025-09-10 02:43:28.143486', 'step': 14877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:28.174208', 'step': 14877, 'epoch': 2} {'type': 'loss', 'content': 0.05084054917097092, 'timestamp': '2025-09-10 02:43:28.176066', 'step': 14878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.206367', 'step': 14878, 'epoch': 2} {'type': 'loss', 'content': 0.14298519492149353, 'timestamp': '2025-09-10 02:43:28.208484', 'step': 14879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:28.238885', 'step': 14879, 'epoch': 2} {'type': 'loss', 'content': 0.08888470381498337, 'timestamp': '2025-09-10 02:43:28.262162', 'step': 14880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:28.292644', 'step': 14880, 'epoch': 2} {'type': 'loss', 'content': 0.029891865327954292, 'timestamp': '2025-09-10 02:43:28.294871', 'step': 14881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.324160', 'step': 14881, 'epoch': 2} {'type': 'loss', 'content': 0.12262183427810669, 'timestamp': '2025-09-10 02:43:28.326582', 'step': 14882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.357292', 'step': 14882, 'epoch': 2} {'type': 'loss', 'content': 0.14051291346549988, 'timestamp': '2025-09-10 02:43:28.360675', 'step': 14883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.392890', 'step': 14883, 'epoch': 2} {'type': 'loss', 'content': 0.05653471499681473, 'timestamp': '2025-09-10 02:43:28.416174', 'step': 14884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.446669', 'step': 14884, 'epoch': 2} {'type': 'loss', 'content': 0.14259201288223267, 'timestamp': '2025-09-10 02:43:28.449130', 'step': 14885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.479629', 'step': 14885, 'epoch': 2} {'type': 'loss', 'content': 0.05652281641960144, 'timestamp': '2025-09-10 02:43:28.481884', 'step': 14886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:28.511762', 'step': 14886, 'epoch': 2} {'type': 'loss', 'content': 0.06667909771203995, 'timestamp': '2025-09-10 02:43:28.513980', 'step': 14887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:28.543679', 'step': 14887, 'epoch': 2} {'type': 'loss', 'content': 0.050401199609041214, 'timestamp': '2025-09-10 02:43:28.569168', 'step': 14888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:28.599357', 'step': 14888, 'epoch': 2} {'type': 'loss', 'content': 0.14367026090621948, 'timestamp': '2025-09-10 02:43:28.601856', 'step': 14889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.632795', 'step': 14889, 'epoch': 2} {'type': 'loss', 'content': 0.08268261700868607, 'timestamp': '2025-09-10 02:43:28.634712', 'step': 14890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:28.664723', 'step': 14890, 'epoch': 2} {'type': 'loss', 'content': 0.1506783664226532, 'timestamp': '2025-09-10 02:43:28.667346', 'step': 14891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:28.698905', 'step': 14891, 'epoch': 2} {'type': 'loss', 'content': 0.1595887988805771, 'timestamp': '2025-09-10 02:43:28.722217', 'step': 14892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:28.752155', 'step': 14892, 'epoch': 2} {'type': 'loss', 'content': 0.0884682908654213, 'timestamp': '2025-09-10 02:43:28.753826', 'step': 14893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:28.783619', 'step': 14893, 'epoch': 2} {'type': 'loss', 'content': 0.060217469930648804, 'timestamp': '2025-09-10 02:43:28.786238', 'step': 14894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.816630', 'step': 14894, 'epoch': 2} {'type': 'loss', 'content': 0.10643378645181656, 'timestamp': '2025-09-10 02:43:28.818997', 'step': 14895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.848638', 'step': 14895, 'epoch': 2} {'type': 'loss', 'content': 0.08545223623514175, 'timestamp': '2025-09-10 02:43:28.872181', 'step': 14896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:28.901984', 'step': 14896, 'epoch': 2} {'type': 'loss', 'content': 0.11049803346395493, 'timestamp': '2025-09-10 02:43:28.904606', 'step': 14897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:28.934638', 'step': 14897, 'epoch': 2} {'type': 'loss', 'content': 0.06649722903966904, 'timestamp': '2025-09-10 02:43:28.936938', 'step': 14898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:28.966946', 'step': 14898, 'epoch': 2} {'type': 'loss', 'content': 0.09860596060752869, 'timestamp': '2025-09-10 02:43:28.970480', 'step': 14899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:29.000969', 'step': 14899, 'epoch': 2} {'type': 'loss', 'content': 0.13632985949516296, 'timestamp': '2025-09-10 02:43:29.024666', 'step': 14900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:29.054455', 'step': 14900, 'epoch': 2} {'type': 'loss', 'content': 0.08878524601459503, 'timestamp': '2025-09-10 02:43:29.056673', 'step': 14901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:29.086615', 'step': 14901, 'epoch': 2} {'type': 'loss', 'content': 0.10077757388353348, 'timestamp': '2025-09-10 02:43:29.089092', 'step': 14902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:43:29.119770', 'step': 14902, 'epoch': 2} {'type': 'loss', 'content': 0.13858497142791748, 'timestamp': '2025-09-10 02:43:29.124298', 'step': 14903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:29.155099', 'step': 14903, 'epoch': 2} {'type': 'loss', 'content': 0.047906968742609024, 'timestamp': '2025-09-10 02:43:29.178651', 'step': 14904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:29.209507', 'step': 14904, 'epoch': 2} {'type': 'loss', 'content': 0.0613480806350708, 'timestamp': '2025-09-10 02:43:29.218330', 'step': 14905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:29.251989', 'step': 14905, 'epoch': 2} {'type': 'loss', 'content': 0.0955003872513771, 'timestamp': '2025-09-10 02:43:29.254206', 'step': 14906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:29.283657', 'step': 14906, 'epoch': 2} {'type': 'loss', 'content': 0.16510576009750366, 'timestamp': '2025-09-10 02:43:29.285865', 'step': 14907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:29.316361', 'step': 14907, 'epoch': 2} {'type': 'loss', 'content': 0.14321956038475037, 'timestamp': '2025-09-10 02:43:29.339788', 'step': 14908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:29.371202', 'step': 14908, 'epoch': 2} {'type': 'loss', 'content': 0.09404922276735306, 'timestamp': '2025-09-10 02:43:29.373506', 'step': 14909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:29.403774', 'step': 14909, 'epoch': 2} {'type': 'loss', 'content': 0.12055585533380508, 'timestamp': '2025-09-10 02:43:29.405949', 'step': 14910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:29.436497', 'step': 14910, 'epoch': 2} {'type': 'loss', 'content': 0.09351984411478043, 'timestamp': '2025-09-10 02:43:29.438663', 'step': 14911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:29.469285', 'step': 14911, 'epoch': 2} {'type': 'loss', 'content': 0.046179160475730896, 'timestamp': '2025-09-10 02:43:29.494332', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:43:37.444174', 'step': 14912, 'epoch': 2} {'type': 'pplx', 'content': 8050.159583505723, 'timestamp': '2025-09-10 02:43:37.447310', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:37.475817', 'step': 14912, 'epoch': 2} {'type': 'loss', 'content': 0.077140212059021, 'timestamp': '2025-09-10 02:43:37.477934', 'step': 14913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:37.512866', 'step': 14913, 'epoch': 2} {'type': 'loss', 'content': 0.0329374261200428, 'timestamp': '2025-09-10 02:43:37.519187', 'step': 14914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:37.552400', 'step': 14914, 'epoch': 2} {'type': 'loss', 'content': 0.2745843529701233, 'timestamp': '2025-09-10 02:43:37.554875', 'step': 14915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:37.586050', 'step': 14915, 'epoch': 2} {'type': 'loss', 'content': 0.08229327201843262, 'timestamp': '2025-09-10 02:43:37.615849', 'step': 14916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:37.646252', 'step': 14916, 'epoch': 2} {'type': 'loss', 'content': 0.10499311983585358, 'timestamp': '2025-09-10 02:43:37.648162', 'step': 14917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:37.678330', 'step': 14917, 'epoch': 2} {'type': 'loss', 'content': 0.07166054099798203, 'timestamp': '2025-09-10 02:43:37.680166', 'step': 14918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:37.710070', 'step': 14918, 'epoch': 2} {'type': 'loss', 'content': 0.04624311253428459, 'timestamp': '2025-09-10 02:43:37.711988', 'step': 14919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1542724875376}, 'timestamp': '2025-09-10 02:43:37.742878', 'step': 14919, 'epoch': 2} {'type': 'loss', 'content': 0.24663962423801422, 'timestamp': '2025-09-10 02:43:37.766401', 'step': 14920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:37.797218', 'step': 14920, 'epoch': 3} {'type': 'loss', 'content': 0.05560671165585518, 'timestamp': '2025-09-10 02:43:37.799574', 'step': 14921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:37.829136', 'step': 14921, 'epoch': 3} {'type': 'loss', 'content': 0.076095350086689, 'timestamp': '2025-09-10 02:43:37.831543', 'step': 14922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:37.862053', 'step': 14922, 'epoch': 3} {'type': 'loss', 'content': 0.10388187319040298, 'timestamp': '2025-09-10 02:43:37.864498', 'step': 14923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:37.895895', 'step': 14923, 'epoch': 3} {'type': 'loss', 'content': 0.11953864246606827, 'timestamp': '2025-09-10 02:43:37.919489', 'step': 14924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:37.951194', 'step': 14924, 'epoch': 3} {'type': 'loss', 'content': 0.15372641384601593, 'timestamp': '2025-09-10 02:43:37.953448', 'step': 14925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:37.984320', 'step': 14925, 'epoch': 3} {'type': 'loss', 'content': 0.054681576788425446, 'timestamp': '2025-09-10 02:43:37.993604', 'step': 14926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:38.033183', 'step': 14926, 'epoch': 3} {'type': 'loss', 'content': 0.10285139828920364, 'timestamp': '2025-09-10 02:43:38.036795', 'step': 14927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:38.066946', 'step': 14927, 'epoch': 3} {'type': 'loss', 'content': 0.04686969891190529, 'timestamp': '2025-09-10 02:43:38.090012', 'step': 14928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:38.119945', 'step': 14928, 'epoch': 3} {'type': 'loss', 'content': 0.047198642045259476, 'timestamp': '2025-09-10 02:43:38.121837', 'step': 14929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:38.151346', 'step': 14929, 'epoch': 3} {'type': 'loss', 'content': 0.07866457104682922, 'timestamp': '2025-09-10 02:43:38.153572', 'step': 14930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:38.184482', 'step': 14930, 'epoch': 3} {'type': 'loss', 'content': 0.052645351737737656, 'timestamp': '2025-09-10 02:43:38.186798', 'step': 14931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:38.219266', 'step': 14931, 'epoch': 3} {'type': 'loss', 'content': 0.1311795860528946, 'timestamp': '2025-09-10 02:43:38.242617', 'step': 14932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:38.272853', 'step': 14932, 'epoch': 3} {'type': 'loss', 'content': 0.06536328792572021, 'timestamp': '2025-09-10 02:43:38.274652', 'step': 14933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:38.304821', 'step': 14933, 'epoch': 3} {'type': 'loss', 'content': 0.060149166733026505, 'timestamp': '2025-09-10 02:43:38.306718', 'step': 14934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:38.336738', 'step': 14934, 'epoch': 3} {'type': 'loss', 'content': 0.1179494857788086, 'timestamp': '2025-09-10 02:43:38.339364', 'step': 14935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:38.370884', 'step': 14935, 'epoch': 3} {'type': 'loss', 'content': 0.1446755826473236, 'timestamp': '2025-09-10 02:43:38.394780', 'step': 14936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:38.424312', 'step': 14936, 'epoch': 3} {'type': 'loss', 'content': 0.02864670380949974, 'timestamp': '2025-09-10 02:43:38.426444', 'step': 14937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:38.456324', 'step': 14937, 'epoch': 3} {'type': 'loss', 'content': 0.07948797196149826, 'timestamp': '2025-09-10 02:43:38.458771', 'step': 14938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:38.489883', 'step': 14938, 'epoch': 3} {'type': 'loss', 'content': 0.1337646096944809, 'timestamp': '2025-09-10 02:43:38.492396', 'step': 14939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:38.521949', 'step': 14939, 'epoch': 3} {'type': 'loss', 'content': 0.07000910490751266, 'timestamp': '2025-09-10 02:43:38.545361', 'step': 14940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:38.575615', 'step': 14940, 'epoch': 3} {'type': 'loss', 'content': 0.03847973421216011, 'timestamp': '2025-09-10 02:43:38.577551', 'step': 14941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:38.608402', 'step': 14941, 'epoch': 3} {'type': 'loss', 'content': 0.10934009402990341, 'timestamp': '2025-09-10 02:43:38.610236', 'step': 14942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:38.639549', 'step': 14942, 'epoch': 3} {'type': 'loss', 'content': 0.036322515457868576, 'timestamp': '2025-09-10 02:43:38.641988', 'step': 14943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:38.672622', 'step': 14943, 'epoch': 3} {'type': 'loss', 'content': 0.06985463201999664, 'timestamp': '2025-09-10 02:43:38.695956', 'step': 14944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:38.727141', 'step': 14944, 'epoch': 3} {'type': 'loss', 'content': 0.03548024967312813, 'timestamp': '2025-09-10 02:43:38.729087', 'step': 14945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:38.759205', 'step': 14945, 'epoch': 3} {'type': 'loss', 'content': 0.12417548894882202, 'timestamp': '2025-09-10 02:43:38.761839', 'step': 14946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:38.791733', 'step': 14946, 'epoch': 3} {'type': 'loss', 'content': 0.12815208733081818, 'timestamp': '2025-09-10 02:43:38.794217', 'step': 14947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:38.823822', 'step': 14947, 'epoch': 3} {'type': 'loss', 'content': 0.052599307149648666, 'timestamp': '2025-09-10 02:43:38.847008', 'step': 14948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:38.879952', 'step': 14948, 'epoch': 3} {'type': 'loss', 'content': 0.07223481684923172, 'timestamp': '2025-09-10 02:43:38.881975', 'step': 14949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:38.911790', 'step': 14949, 'epoch': 3} {'type': 'loss', 'content': 0.04559047520160675, 'timestamp': '2025-09-10 02:43:38.914147', 'step': 14950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:38.944461', 'step': 14950, 'epoch': 3} {'type': 'loss', 'content': 0.08839567750692368, 'timestamp': '2025-09-10 02:43:38.946724', 'step': 14951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:38.977943', 'step': 14951, 'epoch': 3} {'type': 'loss', 'content': 0.03751763328909874, 'timestamp': '2025-09-10 02:43:39.001673', 'step': 14952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.032007', 'step': 14952, 'epoch': 3} {'type': 'loss', 'content': 0.09266474097967148, 'timestamp': '2025-09-10 02:43:39.034520', 'step': 14953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.064540', 'step': 14953, 'epoch': 3} {'type': 'loss', 'content': 0.09162509441375732, 'timestamp': '2025-09-10 02:43:39.066715', 'step': 14954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:39.096966', 'step': 14954, 'epoch': 3} {'type': 'loss', 'content': 0.12266404926776886, 'timestamp': '2025-09-10 02:43:39.099251', 'step': 14955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:39.128787', 'step': 14955, 'epoch': 3} {'type': 'loss', 'content': 0.03995779529213905, 'timestamp': '2025-09-10 02:43:39.151685', 'step': 14956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.182425', 'step': 14956, 'epoch': 3} {'type': 'loss', 'content': 0.0486188568174839, 'timestamp': '2025-09-10 02:43:39.184485', 'step': 14957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:39.215339', 'step': 14957, 'epoch': 3} {'type': 'loss', 'content': 0.07841341197490692, 'timestamp': '2025-09-10 02:43:39.217285', 'step': 14958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:39.246731', 'step': 14958, 'epoch': 3} {'type': 'loss', 'content': 0.12647868692874908, 'timestamp': '2025-09-10 02:43:39.248855', 'step': 14959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.279256', 'step': 14959, 'epoch': 3} {'type': 'loss', 'content': 0.055053725838661194, 'timestamp': '2025-09-10 02:43:39.302357', 'step': 14960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:39.332596', 'step': 14960, 'epoch': 3} {'type': 'loss', 'content': 0.05018123611807823, 'timestamp': '2025-09-10 02:43:39.334876', 'step': 14961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.365269', 'step': 14961, 'epoch': 3} {'type': 'loss', 'content': 0.10912448167800903, 'timestamp': '2025-09-10 02:43:39.367654', 'step': 14962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.397872', 'step': 14962, 'epoch': 3} {'type': 'loss', 'content': 0.09618179500102997, 'timestamp': '2025-09-10 02:43:39.401242', 'step': 14963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.432326', 'step': 14963, 'epoch': 3} {'type': 'loss', 'content': 0.08701053261756897, 'timestamp': '2025-09-10 02:43:39.455687', 'step': 14964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.485924', 'step': 14964, 'epoch': 3} {'type': 'loss', 'content': 0.11777280271053314, 'timestamp': '2025-09-10 02:43:39.488174', 'step': 14965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:39.519288', 'step': 14965, 'epoch': 3} {'type': 'loss', 'content': 0.041659846901893616, 'timestamp': '2025-09-10 02:43:39.521602', 'step': 14966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:39.552218', 'step': 14966, 'epoch': 3} {'type': 'loss', 'content': 0.09349270910024643, 'timestamp': '2025-09-10 02:43:39.554691', 'step': 14967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.587010', 'step': 14967, 'epoch': 3} {'type': 'loss', 'content': 0.10071639716625214, 'timestamp': '2025-09-10 02:43:39.610698', 'step': 14968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:39.641041', 'step': 14968, 'epoch': 3} {'type': 'loss', 'content': 0.11578138172626495, 'timestamp': '2025-09-10 02:43:39.643215', 'step': 14969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.672942', 'step': 14969, 'epoch': 3} {'type': 'loss', 'content': 0.07092441618442535, 'timestamp': '2025-09-10 02:43:39.675913', 'step': 14970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:39.706302', 'step': 14970, 'epoch': 3} {'type': 'loss', 'content': 0.07632816582918167, 'timestamp': '2025-09-10 02:43:39.708224', 'step': 14971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.737645', 'step': 14971, 'epoch': 3} {'type': 'loss', 'content': 0.07441894710063934, 'timestamp': '2025-09-10 02:43:39.761242', 'step': 14972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:39.790725', 'step': 14972, 'epoch': 3} {'type': 'loss', 'content': 0.055680278688669205, 'timestamp': '2025-09-10 02:43:39.792745', 'step': 14973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:39.823666', 'step': 14973, 'epoch': 3} {'type': 'loss', 'content': 0.0583902932703495, 'timestamp': '2025-09-10 02:43:39.825795', 'step': 14974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:39.855209', 'step': 14974, 'epoch': 3} {'type': 'loss', 'content': 0.09334631264209747, 'timestamp': '2025-09-10 02:43:39.857649', 'step': 14975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:39.887693', 'step': 14975, 'epoch': 3} {'type': 'loss', 'content': 0.07342589646577835, 'timestamp': '2025-09-10 02:43:39.911138', 'step': 14976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:39.940355', 'step': 14976, 'epoch': 3} {'type': 'loss', 'content': 0.09048736840486526, 'timestamp': '2025-09-10 02:43:39.942577', 'step': 14977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:39.973741', 'step': 14977, 'epoch': 3} {'type': 'loss', 'content': 0.08560126274824142, 'timestamp': '2025-09-10 02:43:39.976989', 'step': 14978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:40.008286', 'step': 14978, 'epoch': 3} {'type': 'loss', 'content': 0.10484804213047028, 'timestamp': '2025-09-10 02:43:40.010226', 'step': 14979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:40.039650', 'step': 14979, 'epoch': 3} {'type': 'loss', 'content': 0.07040222734212875, 'timestamp': '2025-09-10 02:43:40.063289', 'step': 14980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:40.093803', 'step': 14980, 'epoch': 3} {'type': 'loss', 'content': 0.0753619447350502, 'timestamp': '2025-09-10 02:43:40.096369', 'step': 14981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:40.125896', 'step': 14981, 'epoch': 3} {'type': 'loss', 'content': 0.06805457919836044, 'timestamp': '2025-09-10 02:43:40.128094', 'step': 14982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:40.157518', 'step': 14982, 'epoch': 3} {'type': 'loss', 'content': 0.18473438918590546, 'timestamp': '2025-09-10 02:43:40.159878', 'step': 14983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:40.189337', 'step': 14983, 'epoch': 3} {'type': 'loss', 'content': 0.05571630969643593, 'timestamp': '2025-09-10 02:43:40.212453', 'step': 14984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:40.241978', 'step': 14984, 'epoch': 3} {'type': 'loss', 'content': 0.02562502957880497, 'timestamp': '2025-09-10 02:43:40.243910', 'step': 14985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:40.273094', 'step': 14985, 'epoch': 3} {'type': 'loss', 'content': 0.07252458482980728, 'timestamp': '2025-09-10 02:43:40.275391', 'step': 14986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:40.305466', 'step': 14986, 'epoch': 3} {'type': 'loss', 'content': 0.05478253960609436, 'timestamp': '2025-09-10 02:43:40.307895', 'step': 14987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:40.339225', 'step': 14987, 'epoch': 3} {'type': 'loss', 'content': 0.08163613080978394, 'timestamp': '2025-09-10 02:43:40.362260', 'step': 14988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:40.392349', 'step': 14988, 'epoch': 3} {'type': 'loss', 'content': 0.10734941065311432, 'timestamp': '2025-09-10 02:43:40.395398', 'step': 14989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:40.426373', 'step': 14989, 'epoch': 3} {'type': 'loss', 'content': 0.08291520178318024, 'timestamp': '2025-09-10 02:43:40.428856', 'step': 14990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:40.459689', 'step': 14990, 'epoch': 3} {'type': 'loss', 'content': 0.07095814496278763, 'timestamp': '2025-09-10 02:43:40.462087', 'step': 14991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:40.491433', 'step': 14991, 'epoch': 3} {'type': 'loss', 'content': 0.07754513621330261, 'timestamp': '2025-09-10 02:43:40.514915', 'step': 14992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:40.544896', 'step': 14992, 'epoch': 3} {'type': 'loss', 'content': 0.08513706177473068, 'timestamp': '2025-09-10 02:43:40.547605', 'step': 14993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:40.577502', 'step': 14993, 'epoch': 3} {'type': 'loss', 'content': 0.05211145803332329, 'timestamp': '2025-09-10 02:43:40.579697', 'step': 14994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:40.610922', 'step': 14994, 'epoch': 3} {'type': 'loss', 'content': 0.08216114342212677, 'timestamp': '2025-09-10 02:43:40.613514', 'step': 14995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:40.644342', 'step': 14995, 'epoch': 3} {'type': 'loss', 'content': 0.08281678706407547, 'timestamp': '2025-09-10 02:43:40.667560', 'step': 14996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:40.696780', 'step': 14996, 'epoch': 3} {'type': 'loss', 'content': 0.05836052820086479, 'timestamp': '2025-09-10 02:43:40.698954', 'step': 14997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:40.728884', 'step': 14997, 'epoch': 3} {'type': 'loss', 'content': 0.041341349482536316, 'timestamp': '2025-09-10 02:43:40.731374', 'step': 14998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:40.761375', 'step': 14998, 'epoch': 3} {'type': 'loss', 'content': 0.04271923005580902, 'timestamp': '2025-09-10 02:43:40.763947', 'step': 14999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:40.793732', 'step': 14999, 'epoch': 3} {'type': 'loss', 'content': 0.06904813647270203, 'timestamp': '2025-09-10 02:43:40.817397', 'step': 15000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15000', 'timestamp': '2025-09-10 02:43:47.515970', 'step': 15000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:47.556455', 'step': 15000, 'epoch': 3} {'type': 'loss', 'content': 0.09780096262693405, 'timestamp': '2025-09-10 02:43:47.558709', 'step': 15001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:47.589965', 'step': 15001, 'epoch': 3} {'type': 'loss', 'content': 0.08748539537191391, 'timestamp': '2025-09-10 02:43:47.592179', 'step': 15002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:47.621811', 'step': 15002, 'epoch': 3} {'type': 'loss', 'content': 0.09005846083164215, 'timestamp': '2025-09-10 02:43:47.624320', 'step': 15003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:47.654149', 'step': 15003, 'epoch': 3} {'type': 'loss', 'content': 0.09248620271682739, 'timestamp': '2025-09-10 02:43:47.677842', 'step': 15004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:47.707460', 'step': 15004, 'epoch': 3} {'type': 'loss', 'content': 0.055103451013565063, 'timestamp': '2025-09-10 02:43:47.711074', 'step': 15005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:47.743766', 'step': 15005, 'epoch': 3} {'type': 'loss', 'content': 0.027356382459402084, 'timestamp': '2025-09-10 02:43:47.746206', 'step': 15006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:47.778204', 'step': 15006, 'epoch': 3} {'type': 'loss', 'content': 0.08262190967798233, 'timestamp': '2025-09-10 02:43:47.780476', 'step': 15007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:47.810233', 'step': 15007, 'epoch': 3} {'type': 'loss', 'content': 0.05164298042654991, 'timestamp': '2025-09-10 02:43:47.833763', 'step': 15008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:47.865310', 'step': 15008, 'epoch': 3} {'type': 'loss', 'content': 0.0622793547809124, 'timestamp': '2025-09-10 02:43:47.867786', 'step': 15009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:47.898321', 'step': 15009, 'epoch': 3} {'type': 'loss', 'content': 0.10818496346473694, 'timestamp': '2025-09-10 02:43:47.900733', 'step': 15010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:47.930408', 'step': 15010, 'epoch': 3} {'type': 'loss', 'content': 0.06556069850921631, 'timestamp': '2025-09-10 02:43:47.932516', 'step': 15011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:47.963554', 'step': 15011, 'epoch': 3} {'type': 'loss', 'content': 0.07358453422784805, 'timestamp': '2025-09-10 02:43:47.987281', 'step': 15012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:48.016874', 'step': 15012, 'epoch': 3} {'type': 'loss', 'content': 0.09524763375520706, 'timestamp': '2025-09-10 02:43:48.019918', 'step': 15013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:48.051180', 'step': 15013, 'epoch': 3} {'type': 'loss', 'content': 0.07579861581325531, 'timestamp': '2025-09-10 02:43:48.054019', 'step': 15014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:48.084398', 'step': 15014, 'epoch': 3} {'type': 'loss', 'content': 0.05026108771562576, 'timestamp': '2025-09-10 02:43:48.086633', 'step': 15015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:48.116999', 'step': 15015, 'epoch': 3} {'type': 'loss', 'content': 0.08914641290903091, 'timestamp': '2025-09-10 02:43:48.140539', 'step': 15016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.170311', 'step': 15016, 'epoch': 3} {'type': 'loss', 'content': 0.07954313606023788, 'timestamp': '2025-09-10 02:43:48.172646', 'step': 15017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.202760', 'step': 15017, 'epoch': 3} {'type': 'loss', 'content': 0.09612134844064713, 'timestamp': '2025-09-10 02:43:48.205255', 'step': 15018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:48.235636', 'step': 15018, 'epoch': 3} {'type': 'loss', 'content': 0.0874432772397995, 'timestamp': '2025-09-10 02:43:48.237999', 'step': 15019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.267891', 'step': 15019, 'epoch': 3} {'type': 'loss', 'content': 0.07683815807104111, 'timestamp': '2025-09-10 02:43:48.292657', 'step': 15020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:48.324154', 'step': 15020, 'epoch': 3} {'type': 'loss', 'content': 0.1976669430732727, 'timestamp': '2025-09-10 02:43:48.326526', 'step': 15021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:48.356511', 'step': 15021, 'epoch': 3} {'type': 'loss', 'content': 0.05142877623438835, 'timestamp': '2025-09-10 02:43:48.358910', 'step': 15022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:48.389130', 'step': 15022, 'epoch': 3} {'type': 'loss', 'content': 0.09034930914640427, 'timestamp': '2025-09-10 02:43:48.391586', 'step': 15023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.421509', 'step': 15023, 'epoch': 3} {'type': 'loss', 'content': 0.15267106890678406, 'timestamp': '2025-09-10 02:43:48.445349', 'step': 15024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.475224', 'step': 15024, 'epoch': 3} {'type': 'loss', 'content': 0.10501930117607117, 'timestamp': '2025-09-10 02:43:48.477689', 'step': 15025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.508458', 'step': 15025, 'epoch': 3} {'type': 'loss', 'content': 0.05625450611114502, 'timestamp': '2025-09-10 02:43:48.510631', 'step': 15026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:48.544418', 'step': 15026, 'epoch': 3} {'type': 'loss', 'content': 0.08756490796804428, 'timestamp': '2025-09-10 02:43:48.546608', 'step': 15027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:48.578024', 'step': 15027, 'epoch': 3} {'type': 'loss', 'content': 0.11587651818990707, 'timestamp': '2025-09-10 02:43:48.601868', 'step': 15028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:48.632086', 'step': 15028, 'epoch': 3} {'type': 'loss', 'content': 0.09239964187145233, 'timestamp': '2025-09-10 02:43:48.634550', 'step': 15029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.665037', 'step': 15029, 'epoch': 3} {'type': 'loss', 'content': 0.12070386856794357, 'timestamp': '2025-09-10 02:43:48.667448', 'step': 15030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.697605', 'step': 15030, 'epoch': 3} {'type': 'loss', 'content': 0.05187952145934105, 'timestamp': '2025-09-10 02:43:48.699862', 'step': 15031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:48.730024', 'step': 15031, 'epoch': 3} {'type': 'loss', 'content': 0.0635339692234993, 'timestamp': '2025-09-10 02:43:48.753433', 'step': 15032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.784328', 'step': 15032, 'epoch': 3} {'type': 'loss', 'content': 0.12058757245540619, 'timestamp': '2025-09-10 02:43:48.786495', 'step': 15033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:48.817633', 'step': 15033, 'epoch': 3} {'type': 'loss', 'content': 0.15320953726768494, 'timestamp': '2025-09-10 02:43:48.820241', 'step': 15034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:48.853312', 'step': 15034, 'epoch': 3} {'type': 'loss', 'content': 0.07662533223628998, 'timestamp': '2025-09-10 02:43:48.856688', 'step': 15035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:48.889459', 'step': 15035, 'epoch': 3} {'type': 'loss', 'content': 0.1710149645805359, 'timestamp': '2025-09-10 02:43:48.913856', 'step': 15036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:48.949621', 'step': 15036, 'epoch': 3} {'type': 'loss', 'content': 0.08417588472366333, 'timestamp': '2025-09-10 02:43:48.951653', 'step': 15037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:48.982533', 'step': 15037, 'epoch': 3} {'type': 'loss', 'content': 0.1326172798871994, 'timestamp': '2025-09-10 02:43:48.985620', 'step': 15038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:49.017344', 'step': 15038, 'epoch': 3} {'type': 'loss', 'content': 0.06965332478284836, 'timestamp': '2025-09-10 02:43:49.022936', 'step': 15039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:49.058630', 'step': 15039, 'epoch': 3} {'type': 'loss', 'content': 0.1654321402311325, 'timestamp': '2025-09-10 02:43:49.082218', 'step': 15040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:49.112128', 'step': 15040, 'epoch': 3} {'type': 'loss', 'content': 0.11825527250766754, 'timestamp': '2025-09-10 02:43:49.114490', 'step': 15041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:49.144585', 'step': 15041, 'epoch': 3} {'type': 'loss', 'content': 0.07084057480096817, 'timestamp': '2025-09-10 02:43:49.147335', 'step': 15042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:49.177555', 'step': 15042, 'epoch': 3} {'type': 'loss', 'content': 0.10432758182287216, 'timestamp': '2025-09-10 02:43:49.180072', 'step': 15043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:49.209800', 'step': 15043, 'epoch': 3} {'type': 'loss', 'content': 0.08644171059131622, 'timestamp': '2025-09-10 02:43:49.233069', 'step': 15044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:49.264241', 'step': 15044, 'epoch': 3} {'type': 'loss', 'content': 0.007324595004320145, 'timestamp': '2025-09-10 02:43:49.266471', 'step': 15045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:49.297845', 'step': 15045, 'epoch': 3} {'type': 'loss', 'content': 0.036263979971408844, 'timestamp': '2025-09-10 02:43:49.300433', 'step': 15046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:49.330181', 'step': 15046, 'epoch': 3} {'type': 'loss', 'content': 0.07730944454669952, 'timestamp': '2025-09-10 02:43:49.332585', 'step': 15047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:49.363519', 'step': 15047, 'epoch': 3} {'type': 'loss', 'content': 0.19519397616386414, 'timestamp': '2025-09-10 02:43:49.386950', 'step': 15048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:49.418477', 'step': 15048, 'epoch': 3} {'type': 'loss', 'content': 0.1623145192861557, 'timestamp': '2025-09-10 02:43:49.420977', 'step': 15049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:49.451290', 'step': 15049, 'epoch': 3} {'type': 'loss', 'content': 0.08036595582962036, 'timestamp': '2025-09-10 02:43:49.453394', 'step': 15050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:49.484370', 'step': 15050, 'epoch': 3} {'type': 'loss', 'content': 0.07772409170866013, 'timestamp': '2025-09-10 02:43:49.486538', 'step': 15051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:49.516646', 'step': 15051, 'epoch': 3} {'type': 'loss', 'content': 0.0584641769528389, 'timestamp': '2025-09-10 02:43:49.540867', 'step': 15052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:49.572202', 'step': 15052, 'epoch': 3} {'type': 'loss', 'content': 0.1570737659931183, 'timestamp': '2025-09-10 02:43:49.574477', 'step': 15053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:49.604344', 'step': 15053, 'epoch': 3} {'type': 'loss', 'content': 0.15708816051483154, 'timestamp': '2025-09-10 02:43:49.606697', 'step': 15054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:49.638039', 'step': 15054, 'epoch': 3} {'type': 'loss', 'content': 0.03689098730683327, 'timestamp': '2025-09-10 02:43:49.640345', 'step': 15055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:49.670885', 'step': 15055, 'epoch': 3} {'type': 'loss', 'content': 0.10211730003356934, 'timestamp': '2025-09-10 02:43:49.694587', 'step': 15056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:49.725450', 'step': 15056, 'epoch': 3} {'type': 'loss', 'content': 0.04507341608405113, 'timestamp': '2025-09-10 02:43:49.727903', 'step': 15057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:49.760102', 'step': 15057, 'epoch': 3} {'type': 'loss', 'content': 0.08860000222921371, 'timestamp': '2025-09-10 02:43:49.762506', 'step': 15058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:49.792803', 'step': 15058, 'epoch': 3} {'type': 'loss', 'content': 0.10698697715997696, 'timestamp': '2025-09-10 02:43:49.795385', 'step': 15059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:49.825406', 'step': 15059, 'epoch': 3} {'type': 'loss', 'content': 0.07999438792467117, 'timestamp': '2025-09-10 02:43:49.848962', 'step': 15060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:49.881303', 'step': 15060, 'epoch': 3} {'type': 'loss', 'content': 0.15078839659690857, 'timestamp': '2025-09-10 02:43:49.883514', 'step': 15061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:49.913545', 'step': 15061, 'epoch': 3} {'type': 'loss', 'content': 0.08933468163013458, 'timestamp': '2025-09-10 02:43:49.915916', 'step': 15062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:49.946155', 'step': 15062, 'epoch': 3} {'type': 'loss', 'content': 0.11462489515542984, 'timestamp': '2025-09-10 02:43:49.948600', 'step': 15063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:49.978123', 'step': 15063, 'epoch': 3} {'type': 'loss', 'content': 0.089520163834095, 'timestamp': '2025-09-10 02:43:50.001670', 'step': 15064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:50.031906', 'step': 15064, 'epoch': 3} {'type': 'loss', 'content': 0.11359181255102158, 'timestamp': '2025-09-10 02:43:50.034189', 'step': 15065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:50.065197', 'step': 15065, 'epoch': 3} {'type': 'loss', 'content': 0.17456459999084473, 'timestamp': '2025-09-10 02:43:50.067354', 'step': 15066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:50.097675', 'step': 15066, 'epoch': 3} {'type': 'loss', 'content': 0.09240740537643433, 'timestamp': '2025-09-10 02:43:50.100170', 'step': 15067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:50.130002', 'step': 15067, 'epoch': 3} {'type': 'loss', 'content': 0.07824190706014633, 'timestamp': '2025-09-10 02:43:50.153345', 'step': 15068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:50.183864', 'step': 15068, 'epoch': 3} {'type': 'loss', 'content': 0.022497009485960007, 'timestamp': '2025-09-10 02:43:50.185809', 'step': 15069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:50.215723', 'step': 15069, 'epoch': 3} {'type': 'loss', 'content': 0.10455262660980225, 'timestamp': '2025-09-10 02:43:50.218518', 'step': 15070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:50.248924', 'step': 15070, 'epoch': 3} {'type': 'loss', 'content': 0.07758132368326187, 'timestamp': '2025-09-10 02:43:50.251260', 'step': 15071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:50.281436', 'step': 15071, 'epoch': 3} {'type': 'loss', 'content': 0.07060818374156952, 'timestamp': '2025-09-10 02:43:50.305027', 'step': 15072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:50.338201', 'step': 15072, 'epoch': 3} {'type': 'loss', 'content': 0.09302842617034912, 'timestamp': '2025-09-10 02:43:50.340623', 'step': 15073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:50.372101', 'step': 15073, 'epoch': 3} {'type': 'loss', 'content': 0.13638421893119812, 'timestamp': '2025-09-10 02:43:50.374452', 'step': 15074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:43:50.405943', 'step': 15074, 'epoch': 3} {'type': 'loss', 'content': 0.23426982760429382, 'timestamp': '2025-09-10 02:43:50.409997', 'step': 15075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:50.440196', 'step': 15075, 'epoch': 3} {'type': 'loss', 'content': 0.13753864169120789, 'timestamp': '2025-09-10 02:43:50.464405', 'step': 15076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:50.494696', 'step': 15076, 'epoch': 3} {'type': 'loss', 'content': 0.03498861938714981, 'timestamp': '2025-09-10 02:43:50.497385', 'step': 15077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:50.527657', 'step': 15077, 'epoch': 3} {'type': 'loss', 'content': 0.12701360881328583, 'timestamp': '2025-09-10 02:43:50.529848', 'step': 15078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:50.560484', 'step': 15078, 'epoch': 3} {'type': 'loss', 'content': 0.10940273106098175, 'timestamp': '2025-09-10 02:43:50.564075', 'step': 15079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:50.594384', 'step': 15079, 'epoch': 3} {'type': 'loss', 'content': 0.07153376936912537, 'timestamp': '2025-09-10 02:43:50.618567', 'step': 15080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:50.649021', 'step': 15080, 'epoch': 3} {'type': 'loss', 'content': 0.10860991477966309, 'timestamp': '2025-09-10 02:43:50.651605', 'step': 15081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:50.683374', 'step': 15081, 'epoch': 3} {'type': 'loss', 'content': 0.10776188969612122, 'timestamp': '2025-09-10 02:43:50.685819', 'step': 15082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:50.716159', 'step': 15082, 'epoch': 3} {'type': 'loss', 'content': 0.05956365168094635, 'timestamp': '2025-09-10 02:43:50.718931', 'step': 15083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:50.749979', 'step': 15083, 'epoch': 3} {'type': 'loss', 'content': 0.08912016451358795, 'timestamp': '2025-09-10 02:43:50.773146', 'step': 15084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:50.804215', 'step': 15084, 'epoch': 3} {'type': 'loss', 'content': 0.14194327592849731, 'timestamp': '2025-09-10 02:43:50.806446', 'step': 15085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:50.836182', 'step': 15085, 'epoch': 3} {'type': 'loss', 'content': 0.0714382603764534, 'timestamp': '2025-09-10 02:43:50.839099', 'step': 15086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:50.869583', 'step': 15086, 'epoch': 3} {'type': 'loss', 'content': 0.07448311895132065, 'timestamp': '2025-09-10 02:43:50.872248', 'step': 15087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:50.902430', 'step': 15087, 'epoch': 3} {'type': 'loss', 'content': 0.06601832062005997, 'timestamp': '2025-09-10 02:43:50.925854', 'step': 15088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:50.956768', 'step': 15088, 'epoch': 3} {'type': 'loss', 'content': 0.04308705776929855, 'timestamp': '2025-09-10 02:43:50.959244', 'step': 15089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:50.989555', 'step': 15089, 'epoch': 3} {'type': 'loss', 'content': 0.09955548495054245, 'timestamp': '2025-09-10 02:43:50.992332', 'step': 15090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:51.022277', 'step': 15090, 'epoch': 3} {'type': 'loss', 'content': 0.07037415355443954, 'timestamp': '2025-09-10 02:43:51.029096', 'step': 15091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:51.067331', 'step': 15091, 'epoch': 3} {'type': 'loss', 'content': 0.16122423112392426, 'timestamp': '2025-09-10 02:43:51.092043', 'step': 15092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:51.122663', 'step': 15092, 'epoch': 3} {'type': 'loss', 'content': 0.15736424922943115, 'timestamp': '2025-09-10 02:43:51.124900', 'step': 15093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:51.155367', 'step': 15093, 'epoch': 3} {'type': 'loss', 'content': 0.08598147332668304, 'timestamp': '2025-09-10 02:43:51.157815', 'step': 15094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:51.188678', 'step': 15094, 'epoch': 3} {'type': 'loss', 'content': 0.10696810483932495, 'timestamp': '2025-09-10 02:43:51.191350', 'step': 15095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:51.222139', 'step': 15095, 'epoch': 3} {'type': 'loss', 'content': 0.14293846487998962, 'timestamp': '2025-09-10 02:43:51.245558', 'step': 15096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:51.275451', 'step': 15096, 'epoch': 3} {'type': 'loss', 'content': 0.1136956661939621, 'timestamp': '2025-09-10 02:43:51.280186', 'step': 15097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:51.309875', 'step': 15097, 'epoch': 3} {'type': 'loss', 'content': 0.023541437461972237, 'timestamp': '2025-09-10 02:43:51.312250', 'step': 15098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:51.342435', 'step': 15098, 'epoch': 3} {'type': 'loss', 'content': 0.14713120460510254, 'timestamp': '2025-09-10 02:43:51.344784', 'step': 15099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:51.375468', 'step': 15099, 'epoch': 3} {'type': 'loss', 'content': 0.06482399255037308, 'timestamp': '2025-09-10 02:43:51.401648', 'step': 15100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:51.438325', 'step': 15100, 'epoch': 3} {'type': 'loss', 'content': 0.12842895090579987, 'timestamp': '2025-09-10 02:43:51.444078', 'step': 15101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:51.483535', 'step': 15101, 'epoch': 3} {'type': 'loss', 'content': 0.04946241155266762, 'timestamp': '2025-09-10 02:43:51.488682', 'step': 15102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:51.526343', 'step': 15102, 'epoch': 3} {'type': 'loss', 'content': 0.16867604851722717, 'timestamp': '2025-09-10 02:43:51.531901', 'step': 15103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:51.564482', 'step': 15103, 'epoch': 3} {'type': 'loss', 'content': 0.07944747060537338, 'timestamp': '2025-09-10 02:43:51.588030', 'step': 15104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:51.619427', 'step': 15104, 'epoch': 3} {'type': 'loss', 'content': 0.10471689701080322, 'timestamp': '2025-09-10 02:43:51.622196', 'step': 15105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:51.652292', 'step': 15105, 'epoch': 3} {'type': 'loss', 'content': 0.03572792932391167, 'timestamp': '2025-09-10 02:43:51.654471', 'step': 15106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:51.686276', 'step': 15106, 'epoch': 3} {'type': 'loss', 'content': 0.09202046692371368, 'timestamp': '2025-09-10 02:43:51.688305', 'step': 15107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:51.718498', 'step': 15107, 'epoch': 3} {'type': 'loss', 'content': 0.11466216295957565, 'timestamp': '2025-09-10 02:43:51.742882', 'step': 15108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:43:51.775275', 'step': 15108, 'epoch': 3} {'type': 'loss', 'content': 0.13081254065036774, 'timestamp': '2025-09-10 02:43:51.777938', 'step': 15109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:51.810026', 'step': 15109, 'epoch': 3} {'type': 'loss', 'content': 0.03391874581575394, 'timestamp': '2025-09-10 02:43:51.812424', 'step': 15110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:51.843978', 'step': 15110, 'epoch': 3} {'type': 'loss', 'content': 0.02456716261804104, 'timestamp': '2025-09-10 02:43:51.847171', 'step': 15111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:51.878424', 'step': 15111, 'epoch': 3} {'type': 'loss', 'content': 0.08389002829790115, 'timestamp': '2025-09-10 02:43:51.902404', 'step': 15112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:51.933345', 'step': 15112, 'epoch': 3} {'type': 'loss', 'content': 0.11304688453674316, 'timestamp': '2025-09-10 02:43:51.935720', 'step': 15113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:51.966156', 'step': 15113, 'epoch': 3} {'type': 'loss', 'content': 0.11085076630115509, 'timestamp': '2025-09-10 02:43:51.968533', 'step': 15114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:51.998792', 'step': 15114, 'epoch': 3} {'type': 'loss', 'content': 0.10404732078313828, 'timestamp': '2025-09-10 02:43:52.001466', 'step': 15115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.031665', 'step': 15115, 'epoch': 3} {'type': 'loss', 'content': 0.028641507029533386, 'timestamp': '2025-09-10 02:43:52.055405', 'step': 15116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.086391', 'step': 15116, 'epoch': 3} {'type': 'loss', 'content': 0.07784883677959442, 'timestamp': '2025-09-10 02:43:52.088802', 'step': 15117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:52.118542', 'step': 15117, 'epoch': 3} {'type': 'loss', 'content': 0.0683361068367958, 'timestamp': '2025-09-10 02:43:52.121107', 'step': 15118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.152381', 'step': 15118, 'epoch': 3} {'type': 'loss', 'content': 0.06917885690927505, 'timestamp': '2025-09-10 02:43:52.154646', 'step': 15119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:52.185251', 'step': 15119, 'epoch': 3} {'type': 'loss', 'content': 0.11312325298786163, 'timestamp': '2025-09-10 02:43:52.209229', 'step': 15120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:52.239405', 'step': 15120, 'epoch': 3} {'type': 'loss', 'content': 0.18803906440734863, 'timestamp': '2025-09-10 02:43:52.242105', 'step': 15121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:52.272581', 'step': 15121, 'epoch': 3} {'type': 'loss', 'content': 0.08350077271461487, 'timestamp': '2025-09-10 02:43:52.275173', 'step': 15122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:52.308873', 'step': 15122, 'epoch': 3} {'type': 'loss', 'content': 0.0878412052989006, 'timestamp': '2025-09-10 02:43:52.311491', 'step': 15123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.341460', 'step': 15123, 'epoch': 3} {'type': 'loss', 'content': 0.07920423150062561, 'timestamp': '2025-09-10 02:43:52.365050', 'step': 15124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:52.396737', 'step': 15124, 'epoch': 3} {'type': 'loss', 'content': 0.11712613701820374, 'timestamp': '2025-09-10 02:43:52.399306', 'step': 15125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:52.430019', 'step': 15125, 'epoch': 3} {'type': 'loss', 'content': 0.06757817417383194, 'timestamp': '2025-09-10 02:43:52.432726', 'step': 15126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:52.463191', 'step': 15126, 'epoch': 3} {'type': 'loss', 'content': 0.10182264447212219, 'timestamp': '2025-09-10 02:43:52.465747', 'step': 15127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:52.499008', 'step': 15127, 'epoch': 3} {'type': 'loss', 'content': 0.11828320473432541, 'timestamp': '2025-09-10 02:43:52.522443', 'step': 15128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:52.553533', 'step': 15128, 'epoch': 3} {'type': 'loss', 'content': 0.07429805397987366, 'timestamp': '2025-09-10 02:43:52.555838', 'step': 15129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:52.587006', 'step': 15129, 'epoch': 3} {'type': 'loss', 'content': 0.2507927417755127, 'timestamp': '2025-09-10 02:43:52.589651', 'step': 15130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:52.619972', 'step': 15130, 'epoch': 3} {'type': 'loss', 'content': 0.07121838629245758, 'timestamp': '2025-09-10 02:43:52.622803', 'step': 15131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.654644', 'step': 15131, 'epoch': 3} {'type': 'loss', 'content': 0.12760676443576813, 'timestamp': '2025-09-10 02:43:52.678537', 'step': 15132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.710262', 'step': 15132, 'epoch': 3} {'type': 'loss', 'content': 0.11069970577955246, 'timestamp': '2025-09-10 02:43:52.712695', 'step': 15133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.743590', 'step': 15133, 'epoch': 3} {'type': 'loss', 'content': 0.16725465655326843, 'timestamp': '2025-09-10 02:43:52.746256', 'step': 15134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:52.777449', 'step': 15134, 'epoch': 3} {'type': 'loss', 'content': 0.078895702958107, 'timestamp': '2025-09-10 02:43:52.779339', 'step': 15135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.809631', 'step': 15135, 'epoch': 3} {'type': 'loss', 'content': 0.12406308948993683, 'timestamp': '2025-09-10 02:43:52.833778', 'step': 15136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.865423', 'step': 15136, 'epoch': 3} {'type': 'loss', 'content': 0.1295572817325592, 'timestamp': '2025-09-10 02:43:52.868234', 'step': 15137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:52.901190', 'step': 15137, 'epoch': 3} {'type': 'loss', 'content': 0.08039887249469757, 'timestamp': '2025-09-10 02:43:52.903409', 'step': 15138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:52.934892', 'step': 15138, 'epoch': 3} {'type': 'loss', 'content': 0.14854854345321655, 'timestamp': '2025-09-10 02:43:52.937256', 'step': 15139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:52.968133', 'step': 15139, 'epoch': 3} {'type': 'loss', 'content': 0.12593092024326324, 'timestamp': '2025-09-10 02:43:52.991854', 'step': 15140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.022323', 'step': 15140, 'epoch': 3} {'type': 'loss', 'content': 0.15462897717952728, 'timestamp': '2025-09-10 02:43:53.025762', 'step': 15141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:53.057644', 'step': 15141, 'epoch': 3} {'type': 'loss', 'content': 0.0743834599852562, 'timestamp': '2025-09-10 02:43:53.060410', 'step': 15142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.091514', 'step': 15142, 'epoch': 3} {'type': 'loss', 'content': 0.1072259396314621, 'timestamp': '2025-09-10 02:43:53.093997', 'step': 15143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:53.124271', 'step': 15143, 'epoch': 3} {'type': 'loss', 'content': 0.051755744963884354, 'timestamp': '2025-09-10 02:43:53.147828', 'step': 15144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.178126', 'step': 15144, 'epoch': 3} {'type': 'loss', 'content': 0.08204013109207153, 'timestamp': '2025-09-10 02:43:53.181604', 'step': 15145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.212267', 'step': 15145, 'epoch': 3} {'type': 'loss', 'content': 0.10250817984342575, 'timestamp': '2025-09-10 02:43:53.220717', 'step': 15146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:53.252472', 'step': 15146, 'epoch': 3} {'type': 'loss', 'content': 0.10710582137107849, 'timestamp': '2025-09-10 02:43:53.254890', 'step': 15147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:53.285087', 'step': 15147, 'epoch': 3} {'type': 'loss', 'content': 0.036381836980581284, 'timestamp': '2025-09-10 02:43:53.310889', 'step': 15148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.342211', 'step': 15148, 'epoch': 3} {'type': 'loss', 'content': 0.12681977450847626, 'timestamp': '2025-09-10 02:43:53.344722', 'step': 15149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:53.376821', 'step': 15149, 'epoch': 3} {'type': 'loss', 'content': 0.059267062693834305, 'timestamp': '2025-09-10 02:43:53.378957', 'step': 15150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.411027', 'step': 15150, 'epoch': 3} {'type': 'loss', 'content': 0.07696959376335144, 'timestamp': '2025-09-10 02:43:53.413565', 'step': 15151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.443561', 'step': 15151, 'epoch': 3} {'type': 'loss', 'content': 0.13221320509910583, 'timestamp': '2025-09-10 02:43:53.467172', 'step': 15152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:53.498866', 'step': 15152, 'epoch': 3} {'type': 'loss', 'content': 0.09736701101064682, 'timestamp': '2025-09-10 02:43:53.501910', 'step': 15153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:53.532475', 'step': 15153, 'epoch': 3} {'type': 'loss', 'content': 0.07829417288303375, 'timestamp': '2025-09-10 02:43:53.534770', 'step': 15154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:53.564906', 'step': 15154, 'epoch': 3} {'type': 'loss', 'content': 0.13361389935016632, 'timestamp': '2025-09-10 02:43:53.567491', 'step': 15155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:53.598762', 'step': 15155, 'epoch': 3} {'type': 'loss', 'content': 0.040989454835653305, 'timestamp': '2025-09-10 02:43:53.622401', 'step': 15156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.654033', 'step': 15156, 'epoch': 3} {'type': 'loss', 'content': 0.1279822140932083, 'timestamp': '2025-09-10 02:43:53.656429', 'step': 15157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.686605', 'step': 15157, 'epoch': 3} {'type': 'loss', 'content': 0.15342852473258972, 'timestamp': '2025-09-10 02:43:53.688996', 'step': 15158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.719326', 'step': 15158, 'epoch': 3} {'type': 'loss', 'content': 0.1264786422252655, 'timestamp': '2025-09-10 02:43:53.721603', 'step': 15159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:53.753300', 'step': 15159, 'epoch': 3} {'type': 'loss', 'content': 0.03925444558262825, 'timestamp': '2025-09-10 02:43:53.777108', 'step': 15160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.807515', 'step': 15160, 'epoch': 3} {'type': 'loss', 'content': 0.12349440157413483, 'timestamp': '2025-09-10 02:43:53.810132', 'step': 15161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:53.840445', 'step': 15161, 'epoch': 3} {'type': 'loss', 'content': 0.03009761869907379, 'timestamp': '2025-09-10 02:43:53.842536', 'step': 15162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.874786', 'step': 15162, 'epoch': 3} {'type': 'loss', 'content': 0.26763615012168884, 'timestamp': '2025-09-10 02:43:53.877315', 'step': 15163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:53.908259', 'step': 15163, 'epoch': 3} {'type': 'loss', 'content': 0.10937299579381943, 'timestamp': '2025-09-10 02:43:53.931794', 'step': 15164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:53.962988', 'step': 15164, 'epoch': 3} {'type': 'loss', 'content': 0.13368354737758636, 'timestamp': '2025-09-10 02:43:53.965383', 'step': 15165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:53.996123', 'step': 15165, 'epoch': 3} {'type': 'loss', 'content': 0.14917577803134918, 'timestamp': '2025-09-10 02:43:53.998654', 'step': 15166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:54.029897', 'step': 15166, 'epoch': 3} {'type': 'loss', 'content': 0.05888122320175171, 'timestamp': '2025-09-10 02:43:54.032321', 'step': 15167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:54.064345', 'step': 15167, 'epoch': 3} {'type': 'loss', 'content': 0.08099522441625595, 'timestamp': '2025-09-10 02:43:54.091420', 'step': 15168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:54.124289', 'step': 15168, 'epoch': 3} {'type': 'loss', 'content': 0.1049773097038269, 'timestamp': '2025-09-10 02:43:54.127131', 'step': 15169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:54.158516', 'step': 15169, 'epoch': 3} {'type': 'loss', 'content': 0.08724308013916016, 'timestamp': '2025-09-10 02:43:54.160916', 'step': 15170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.191759', 'step': 15170, 'epoch': 3} {'type': 'loss', 'content': 0.09008558094501495, 'timestamp': '2025-09-10 02:43:54.194717', 'step': 15171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:54.225437', 'step': 15171, 'epoch': 3} {'type': 'loss', 'content': 0.07120317965745926, 'timestamp': '2025-09-10 02:43:54.250347', 'step': 15172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:54.281306', 'step': 15172, 'epoch': 3} {'type': 'loss', 'content': 0.16021442413330078, 'timestamp': '2025-09-10 02:43:54.283621', 'step': 15173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.315944', 'step': 15173, 'epoch': 3} {'type': 'loss', 'content': 0.059577975422143936, 'timestamp': '2025-09-10 02:43:54.318322', 'step': 15174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:54.349032', 'step': 15174, 'epoch': 3} {'type': 'loss', 'content': 0.06740034371614456, 'timestamp': '2025-09-10 02:43:54.351409', 'step': 15175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.381800', 'step': 15175, 'epoch': 3} {'type': 'loss', 'content': 0.030414607375860214, 'timestamp': '2025-09-10 02:43:54.405617', 'step': 15176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:54.436937', 'step': 15176, 'epoch': 3} {'type': 'loss', 'content': 0.11548915505409241, 'timestamp': '2025-09-10 02:43:54.439350', 'step': 15177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.470723', 'step': 15177, 'epoch': 3} {'type': 'loss', 'content': 0.10247007757425308, 'timestamp': '2025-09-10 02:43:54.473239', 'step': 15178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:54.503789', 'step': 15178, 'epoch': 3} {'type': 'loss', 'content': 0.09779295325279236, 'timestamp': '2025-09-10 02:43:54.505978', 'step': 15179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.536482', 'step': 15179, 'epoch': 3} {'type': 'loss', 'content': 0.12680640816688538, 'timestamp': '2025-09-10 02:43:54.560263', 'step': 15180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.590779', 'step': 15180, 'epoch': 3} {'type': 'loss', 'content': 0.10311897844076157, 'timestamp': '2025-09-10 02:43:54.593246', 'step': 15181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:54.626055', 'step': 15181, 'epoch': 3} {'type': 'loss', 'content': 0.051728505641222, 'timestamp': '2025-09-10 02:43:54.628530', 'step': 15182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.659442', 'step': 15182, 'epoch': 3} {'type': 'loss', 'content': 0.11119166761636734, 'timestamp': '2025-09-10 02:43:54.661825', 'step': 15183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:43:54.693010', 'step': 15183, 'epoch': 3} {'type': 'loss', 'content': 0.051333341747522354, 'timestamp': '2025-09-10 02:43:54.720793', 'step': 15184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.754949', 'step': 15184, 'epoch': 3} {'type': 'loss', 'content': 0.10219317674636841, 'timestamp': '2025-09-10 02:43:54.757542', 'step': 15185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:54.788334', 'step': 15185, 'epoch': 3} {'type': 'loss', 'content': 0.06663627177476883, 'timestamp': '2025-09-10 02:43:54.790788', 'step': 15186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.821784', 'step': 15186, 'epoch': 3} {'type': 'loss', 'content': 0.09825373440980911, 'timestamp': '2025-09-10 02:43:54.824128', 'step': 15187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:54.854479', 'step': 15187, 'epoch': 3} {'type': 'loss', 'content': 0.0656605064868927, 'timestamp': '2025-09-10 02:43:54.878063', 'step': 15188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:54.908522', 'step': 15188, 'epoch': 3} {'type': 'loss', 'content': 0.08023229241371155, 'timestamp': '2025-09-10 02:43:54.911181', 'step': 15189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:54.941279', 'step': 15189, 'epoch': 3} {'type': 'loss', 'content': 0.061145853251218796, 'timestamp': '2025-09-10 02:43:54.943802', 'step': 15190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:54.974645', 'step': 15190, 'epoch': 3} {'type': 'loss', 'content': 0.16969674825668335, 'timestamp': '2025-09-10 02:43:54.977442', 'step': 15191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:55.008559', 'step': 15191, 'epoch': 3} {'type': 'loss', 'content': 0.09842492640018463, 'timestamp': '2025-09-10 02:43:55.032308', 'step': 15192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:55.064167', 'step': 15192, 'epoch': 3} {'type': 'loss', 'content': 0.09388905018568039, 'timestamp': '2025-09-10 02:43:55.066365', 'step': 15193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:55.097942', 'step': 15193, 'epoch': 3} {'type': 'loss', 'content': 0.053405486047267914, 'timestamp': '2025-09-10 02:43:55.100573', 'step': 15194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:55.134224', 'step': 15194, 'epoch': 3} {'type': 'loss', 'content': 0.07100768387317657, 'timestamp': '2025-09-10 02:43:55.137191', 'step': 15195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:55.167490', 'step': 15195, 'epoch': 3} {'type': 'loss', 'content': 0.061920925974845886, 'timestamp': '2025-09-10 02:43:55.191238', 'step': 15196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:55.221245', 'step': 15196, 'epoch': 3} {'type': 'loss', 'content': 0.08483758568763733, 'timestamp': '2025-09-10 02:43:55.223625', 'step': 15197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:55.255444', 'step': 15197, 'epoch': 3} {'type': 'loss', 'content': 0.0886421650648117, 'timestamp': '2025-09-10 02:43:55.257672', 'step': 15198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:55.288123', 'step': 15198, 'epoch': 3} {'type': 'loss', 'content': 0.08062687516212463, 'timestamp': '2025-09-10 02:43:55.290364', 'step': 15199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:55.322428', 'step': 15199, 'epoch': 3} {'type': 'loss', 'content': 0.10935595631599426, 'timestamp': '2025-09-10 02:43:55.345987', 'step': 15200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:55.376928', 'step': 15200, 'epoch': 3} {'type': 'loss', 'content': 0.03774061053991318, 'timestamp': '2025-09-10 02:43:55.379354', 'step': 15201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:55.409167', 'step': 15201, 'epoch': 3} {'type': 'loss', 'content': 0.07303299754858017, 'timestamp': '2025-09-10 02:43:55.411569', 'step': 15202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:55.442246', 'step': 15202, 'epoch': 3} {'type': 'loss', 'content': 0.057710468769073486, 'timestamp': '2025-09-10 02:43:55.444577', 'step': 15203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:55.475032', 'step': 15203, 'epoch': 3} {'type': 'loss', 'content': 0.03624213859438896, 'timestamp': '2025-09-10 02:43:55.498446', 'step': 15204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:55.528376', 'step': 15204, 'epoch': 3} {'type': 'loss', 'content': 0.10278383642435074, 'timestamp': '2025-09-10 02:43:55.530914', 'step': 15205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:55.561967', 'step': 15205, 'epoch': 3} {'type': 'loss', 'content': 0.17898820340633392, 'timestamp': '2025-09-10 02:43:55.564150', 'step': 15206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:55.594551', 'step': 15206, 'epoch': 3} {'type': 'loss', 'content': 0.2149111032485962, 'timestamp': '2025-09-10 02:43:55.596796', 'step': 15207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:55.626927', 'step': 15207, 'epoch': 3} {'type': 'loss', 'content': 0.15444475412368774, 'timestamp': '2025-09-10 02:43:55.650562', 'step': 15208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:55.681197', 'step': 15208, 'epoch': 3} {'type': 'loss', 'content': 0.12369242310523987, 'timestamp': '2025-09-10 02:43:55.683396', 'step': 15209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:55.713081', 'step': 15209, 'epoch': 3} {'type': 'loss', 'content': 0.08151298761367798, 'timestamp': '2025-09-10 02:43:55.715351', 'step': 15210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:55.745355', 'step': 15210, 'epoch': 3} {'type': 'loss', 'content': 0.09334322065114975, 'timestamp': '2025-09-10 02:43:55.748117', 'step': 15211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:55.778295', 'step': 15211, 'epoch': 3} {'type': 'loss', 'content': 0.14389783143997192, 'timestamp': '2025-09-10 02:43:55.801776', 'step': 15212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:55.832632', 'step': 15212, 'epoch': 3} {'type': 'loss', 'content': 0.15457381308078766, 'timestamp': '2025-09-10 02:43:55.834870', 'step': 15213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:55.864906', 'step': 15213, 'epoch': 3} {'type': 'loss', 'content': 0.11262179911136627, 'timestamp': '2025-09-10 02:43:55.866937', 'step': 15214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:55.897322', 'step': 15214, 'epoch': 3} {'type': 'loss', 'content': 0.12259145081043243, 'timestamp': '2025-09-10 02:43:55.899657', 'step': 15215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:55.929795', 'step': 15215, 'epoch': 3} {'type': 'loss', 'content': 0.06443782150745392, 'timestamp': '2025-09-10 02:43:55.953629', 'step': 15216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:55.984541', 'step': 15216, 'epoch': 3} {'type': 'loss', 'content': 0.07063783705234528, 'timestamp': '2025-09-10 02:43:55.986794', 'step': 15217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:56.018299', 'step': 15217, 'epoch': 3} {'type': 'loss', 'content': 0.08082957565784454, 'timestamp': '2025-09-10 02:43:56.020549', 'step': 15218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:56.054968', 'step': 15218, 'epoch': 3} {'type': 'loss', 'content': 0.08161228150129318, 'timestamp': '2025-09-10 02:43:56.057244', 'step': 15219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:56.087259', 'step': 15219, 'epoch': 3} {'type': 'loss', 'content': 0.09545834362506866, 'timestamp': '2025-09-10 02:43:56.112670', 'step': 15220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:56.144363', 'step': 15220, 'epoch': 3} {'type': 'loss', 'content': 0.09845148772001266, 'timestamp': '2025-09-10 02:43:56.147237', 'step': 15221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:56.178082', 'step': 15221, 'epoch': 3} {'type': 'loss', 'content': 0.1185009703040123, 'timestamp': '2025-09-10 02:43:56.180515', 'step': 15222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:56.211573', 'step': 15222, 'epoch': 3} {'type': 'loss', 'content': 0.08719398826360703, 'timestamp': '2025-09-10 02:43:56.213902', 'step': 15223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:56.245188', 'step': 15223, 'epoch': 3} {'type': 'loss', 'content': 0.1282518357038498, 'timestamp': '2025-09-10 02:43:56.268608', 'step': 15224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:43:56.305558', 'step': 15224, 'epoch': 3} {'type': 'loss', 'content': 0.06695036590099335, 'timestamp': '2025-09-10 02:43:56.321416', 'step': 15225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:56.352046', 'step': 15225, 'epoch': 3} {'type': 'loss', 'content': 0.13071408867835999, 'timestamp': '2025-09-10 02:43:56.355563', 'step': 15226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:56.385599', 'step': 15226, 'epoch': 3} {'type': 'loss', 'content': 0.05684328079223633, 'timestamp': '2025-09-10 02:43:56.388787', 'step': 15227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:56.421610', 'step': 15227, 'epoch': 3} {'type': 'loss', 'content': 0.09565864503383636, 'timestamp': '2025-09-10 02:43:56.446143', 'step': 15228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:56.480734', 'step': 15228, 'epoch': 3} {'type': 'loss', 'content': 0.09746570885181427, 'timestamp': '2025-09-10 02:43:56.483452', 'step': 15229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:56.514684', 'step': 15229, 'epoch': 3} {'type': 'loss', 'content': 0.09472179412841797, 'timestamp': '2025-09-10 02:43:56.517712', 'step': 15230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:56.552086', 'step': 15230, 'epoch': 3} {'type': 'loss', 'content': 0.135823056101799, 'timestamp': '2025-09-10 02:43:56.554980', 'step': 15231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:56.587401', 'step': 15231, 'epoch': 3} {'type': 'loss', 'content': 0.058166053146123886, 'timestamp': '2025-09-10 02:43:56.610942', 'step': 15232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:56.641514', 'step': 15232, 'epoch': 3} {'type': 'loss', 'content': 0.03431452438235283, 'timestamp': '2025-09-10 02:43:56.647864', 'step': 15233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:56.691217', 'step': 15233, 'epoch': 3} {'type': 'loss', 'content': 0.0712970420718193, 'timestamp': '2025-09-10 02:43:56.697159', 'step': 15234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:56.728639', 'step': 15234, 'epoch': 3} {'type': 'loss', 'content': 0.05351303145289421, 'timestamp': '2025-09-10 02:43:56.731198', 'step': 15235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:56.761690', 'step': 15235, 'epoch': 3} {'type': 'loss', 'content': 0.05627566576004028, 'timestamp': '2025-09-10 02:43:56.786373', 'step': 15236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:56.817276', 'step': 15236, 'epoch': 3} {'type': 'loss', 'content': 0.08025746047496796, 'timestamp': '2025-09-10 02:43:56.819369', 'step': 15237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:56.850962', 'step': 15237, 'epoch': 3} {'type': 'loss', 'content': 0.0908195897936821, 'timestamp': '2025-09-10 02:43:56.853214', 'step': 15238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:56.883475', 'step': 15238, 'epoch': 3} {'type': 'loss', 'content': 0.08674154430627823, 'timestamp': '2025-09-10 02:43:56.885777', 'step': 15239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:56.915879', 'step': 15239, 'epoch': 3} {'type': 'loss', 'content': 0.10232886672019958, 'timestamp': '2025-09-10 02:43:56.939208', 'step': 15240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:56.970201', 'step': 15240, 'epoch': 3} {'type': 'loss', 'content': 0.16462767124176025, 'timestamp': '2025-09-10 02:43:56.973250', 'step': 15241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:57.005882', 'step': 15241, 'epoch': 3} {'type': 'loss', 'content': 0.0817708894610405, 'timestamp': '2025-09-10 02:43:57.008097', 'step': 15242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:57.038402', 'step': 15242, 'epoch': 3} {'type': 'loss', 'content': 0.04097345098853111, 'timestamp': '2025-09-10 02:43:57.040777', 'step': 15243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:57.071240', 'step': 15243, 'epoch': 3} {'type': 'loss', 'content': 0.05527057498693466, 'timestamp': '2025-09-10 02:43:57.094895', 'step': 15244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:57.129429', 'step': 15244, 'epoch': 3} {'type': 'loss', 'content': 0.07255015522241592, 'timestamp': '2025-09-10 02:43:57.132077', 'step': 15245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:57.162573', 'step': 15245, 'epoch': 3} {'type': 'loss', 'content': 0.20216788351535797, 'timestamp': '2025-09-10 02:43:57.164956', 'step': 15246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:57.195306', 'step': 15246, 'epoch': 3} {'type': 'loss', 'content': 0.09626985341310501, 'timestamp': '2025-09-10 02:43:57.197495', 'step': 15247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:57.227479', 'step': 15247, 'epoch': 3} {'type': 'loss', 'content': 0.1392170935869217, 'timestamp': '2025-09-10 02:43:57.251313', 'step': 15248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:57.282245', 'step': 15248, 'epoch': 3} {'type': 'loss', 'content': 0.08213317394256592, 'timestamp': '2025-09-10 02:43:57.284701', 'step': 15249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:57.316512', 'step': 15249, 'epoch': 3} {'type': 'loss', 'content': 0.05517127364873886, 'timestamp': '2025-09-10 02:43:57.319065', 'step': 15250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:57.350115', 'step': 15250, 'epoch': 3} {'type': 'loss', 'content': 0.1456577330827713, 'timestamp': '2025-09-10 02:43:57.352636', 'step': 15251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:57.383310', 'step': 15251, 'epoch': 3} {'type': 'loss', 'content': 0.08744458854198456, 'timestamp': '2025-09-10 02:43:57.406724', 'step': 15252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:57.438589', 'step': 15252, 'epoch': 3} {'type': 'loss', 'content': 0.052157800644636154, 'timestamp': '2025-09-10 02:43:57.441085', 'step': 15253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:57.471274', 'step': 15253, 'epoch': 3} {'type': 'loss', 'content': 0.10057578980922699, 'timestamp': '2025-09-10 02:43:57.475081', 'step': 15254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:57.506790', 'step': 15254, 'epoch': 3} {'type': 'loss', 'content': 0.04736558720469475, 'timestamp': '2025-09-10 02:43:57.510454', 'step': 15255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:57.543638', 'step': 15255, 'epoch': 3} {'type': 'loss', 'content': 0.07918336987495422, 'timestamp': '2025-09-10 02:43:57.567198', 'step': 15256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:57.597593', 'step': 15256, 'epoch': 3} {'type': 'loss', 'content': 0.11764898151159286, 'timestamp': '2025-09-10 02:43:57.599901', 'step': 15257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:57.630471', 'step': 15257, 'epoch': 3} {'type': 'loss', 'content': 0.04878135770559311, 'timestamp': '2025-09-10 02:43:57.632976', 'step': 15258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:57.664588', 'step': 15258, 'epoch': 3} {'type': 'loss', 'content': 0.08468623459339142, 'timestamp': '2025-09-10 02:43:57.667014', 'step': 15259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:57.696533', 'step': 15259, 'epoch': 3} {'type': 'loss', 'content': 0.06753671914339066, 'timestamp': '2025-09-10 02:43:57.719908', 'step': 15260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:57.750462', 'step': 15260, 'epoch': 3} {'type': 'loss', 'content': 0.08065115660429001, 'timestamp': '2025-09-10 02:43:57.752544', 'step': 15261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:57.783126', 'step': 15261, 'epoch': 3} {'type': 'loss', 'content': 0.07277917116880417, 'timestamp': '2025-09-10 02:43:57.785714', 'step': 15262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:57.815992', 'step': 15262, 'epoch': 3} {'type': 'loss', 'content': 0.066016785800457, 'timestamp': '2025-09-10 02:43:57.818395', 'step': 15263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:57.848309', 'step': 15263, 'epoch': 3} {'type': 'loss', 'content': 0.06267376244068146, 'timestamp': '2025-09-10 02:43:57.871842', 'step': 15264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:57.902186', 'step': 15264, 'epoch': 3} {'type': 'loss', 'content': 0.10649494081735611, 'timestamp': '2025-09-10 02:43:57.904448', 'step': 15265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:57.934195', 'step': 15265, 'epoch': 3} {'type': 'loss', 'content': 0.08943203836679459, 'timestamp': '2025-09-10 02:43:57.937088', 'step': 15266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:57.971617', 'step': 15266, 'epoch': 3} {'type': 'loss', 'content': 0.12190542370080948, 'timestamp': '2025-09-10 02:43:57.973883', 'step': 15267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:58.008285', 'step': 15267, 'epoch': 3} {'type': 'loss', 'content': 0.04578148201107979, 'timestamp': '2025-09-10 02:43:58.034044', 'step': 15268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:58.077680', 'step': 15268, 'epoch': 3} {'type': 'loss', 'content': 0.10247981548309326, 'timestamp': '2025-09-10 02:43:58.082926', 'step': 15269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:58.124695', 'step': 15269, 'epoch': 3} {'type': 'loss', 'content': 0.039510034024715424, 'timestamp': '2025-09-10 02:43:58.128655', 'step': 15270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:58.158463', 'step': 15270, 'epoch': 3} {'type': 'loss', 'content': 0.11024392396211624, 'timestamp': '2025-09-10 02:43:58.160541', 'step': 15271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:58.190585', 'step': 15271, 'epoch': 3} {'type': 'loss', 'content': 0.07038296014070511, 'timestamp': '2025-09-10 02:43:58.214455', 'step': 15272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:58.245106', 'step': 15272, 'epoch': 3} {'type': 'loss', 'content': 0.03264591097831726, 'timestamp': '2025-09-10 02:43:58.247865', 'step': 15273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:58.278814', 'step': 15273, 'epoch': 3} {'type': 'loss', 'content': 0.043137192726135254, 'timestamp': '2025-09-10 02:43:58.281645', 'step': 15274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:58.313837', 'step': 15274, 'epoch': 3} {'type': 'loss', 'content': 0.0403248630464077, 'timestamp': '2025-09-10 02:43:58.316429', 'step': 15275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:58.347436', 'step': 15275, 'epoch': 3} {'type': 'loss', 'content': 0.19063103199005127, 'timestamp': '2025-09-10 02:43:58.371093', 'step': 15276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:58.402654', 'step': 15276, 'epoch': 3} {'type': 'loss', 'content': 0.12987571954727173, 'timestamp': '2025-09-10 02:43:58.405252', 'step': 15277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:58.437469', 'step': 15277, 'epoch': 3} {'type': 'loss', 'content': 0.030074451118707657, 'timestamp': '2025-09-10 02:43:58.439907', 'step': 15278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:58.472207', 'step': 15278, 'epoch': 3} {'type': 'loss', 'content': 0.04929691180586815, 'timestamp': '2025-09-10 02:43:58.474439', 'step': 15279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:58.504567', 'step': 15279, 'epoch': 3} {'type': 'loss', 'content': 0.15303105115890503, 'timestamp': '2025-09-10 02:43:58.528031', 'step': 15280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:58.558034', 'step': 15280, 'epoch': 3} {'type': 'loss', 'content': 0.036871787160634995, 'timestamp': '2025-09-10 02:43:58.560417', 'step': 15281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:58.590470', 'step': 15281, 'epoch': 3} {'type': 'loss', 'content': 0.0521872416138649, 'timestamp': '2025-09-10 02:43:58.593000', 'step': 15282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:58.623747', 'step': 15282, 'epoch': 3} {'type': 'loss', 'content': 0.03131117299199104, 'timestamp': '2025-09-10 02:43:58.626184', 'step': 15283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:58.656974', 'step': 15283, 'epoch': 3} {'type': 'loss', 'content': 0.04039624333381653, 'timestamp': '2025-09-10 02:43:58.680886', 'step': 15284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:58.710690', 'step': 15284, 'epoch': 3} {'type': 'loss', 'content': 0.13230733573436737, 'timestamp': '2025-09-10 02:43:58.713288', 'step': 15285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:58.745033', 'step': 15285, 'epoch': 3} {'type': 'loss', 'content': 0.07240403443574905, 'timestamp': '2025-09-10 02:43:58.747243', 'step': 15286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:58.778726', 'step': 15286, 'epoch': 3} {'type': 'loss', 'content': 0.20755213499069214, 'timestamp': '2025-09-10 02:43:58.780949', 'step': 15287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:58.811249', 'step': 15287, 'epoch': 3} {'type': 'loss', 'content': 0.06862182915210724, 'timestamp': '2025-09-10 02:43:58.834566', 'step': 15288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:58.865942', 'step': 15288, 'epoch': 3} {'type': 'loss', 'content': 0.15737280249595642, 'timestamp': '2025-09-10 02:43:58.868173', 'step': 15289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:58.897676', 'step': 15289, 'epoch': 3} {'type': 'loss', 'content': 0.029704775661230087, 'timestamp': '2025-09-10 02:43:58.900072', 'step': 15290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:58.929864', 'step': 15290, 'epoch': 3} {'type': 'loss', 'content': 0.059494491666555405, 'timestamp': '2025-09-10 02:43:58.932141', 'step': 15291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:58.962849', 'step': 15291, 'epoch': 3} {'type': 'loss', 'content': 0.09189114719629288, 'timestamp': '2025-09-10 02:43:58.986491', 'step': 15292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:43:59.017391', 'step': 15292, 'epoch': 3} {'type': 'loss', 'content': 0.061652664095163345, 'timestamp': '2025-09-10 02:43:59.019754', 'step': 15293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.050086', 'step': 15293, 'epoch': 3} {'type': 'loss', 'content': 0.0903824046254158, 'timestamp': '2025-09-10 02:43:59.053368', 'step': 15294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.086372', 'step': 15294, 'epoch': 3} {'type': 'loss', 'content': 0.10912039130926132, 'timestamp': '2025-09-10 02:43:59.088756', 'step': 15295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.119070', 'step': 15295, 'epoch': 3} {'type': 'loss', 'content': 0.054065145552158356, 'timestamp': '2025-09-10 02:43:59.143176', 'step': 15296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:59.174409', 'step': 15296, 'epoch': 3} {'type': 'loss', 'content': 0.10996637493371964, 'timestamp': '2025-09-10 02:43:59.176619', 'step': 15297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:59.207030', 'step': 15297, 'epoch': 3} {'type': 'loss', 'content': 0.04537300020456314, 'timestamp': '2025-09-10 02:43:59.209589', 'step': 15298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:59.241033', 'step': 15298, 'epoch': 3} {'type': 'loss', 'content': 0.12111175060272217, 'timestamp': '2025-09-10 02:43:59.243316', 'step': 15299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:59.273487', 'step': 15299, 'epoch': 3} {'type': 'loss', 'content': 0.15754160284996033, 'timestamp': '2025-09-10 02:43:59.297114', 'step': 15300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.327760', 'step': 15300, 'epoch': 3} {'type': 'loss', 'content': 0.13702969253063202, 'timestamp': '2025-09-10 02:43:59.329836', 'step': 15301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:59.360933', 'step': 15301, 'epoch': 3} {'type': 'loss', 'content': 0.17579089105129242, 'timestamp': '2025-09-10 02:43:59.363081', 'step': 15302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:59.394417', 'step': 15302, 'epoch': 3} {'type': 'loss', 'content': 0.07018199563026428, 'timestamp': '2025-09-10 02:43:59.397088', 'step': 15303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.426953', 'step': 15303, 'epoch': 3} {'type': 'loss', 'content': 0.0820147693157196, 'timestamp': '2025-09-10 02:43:59.450815', 'step': 15304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:59.481022', 'step': 15304, 'epoch': 3} {'type': 'loss', 'content': 0.07740364223718643, 'timestamp': '2025-09-10 02:43:59.483515', 'step': 15305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:59.515526', 'step': 15305, 'epoch': 3} {'type': 'loss', 'content': 0.022980576381087303, 'timestamp': '2025-09-10 02:43:59.518155', 'step': 15306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.548361', 'step': 15306, 'epoch': 3} {'type': 'loss', 'content': 0.03637567535042763, 'timestamp': '2025-09-10 02:43:59.550818', 'step': 15307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:43:59.582029', 'step': 15307, 'epoch': 3} {'type': 'loss', 'content': 0.12320759147405624, 'timestamp': '2025-09-10 02:43:59.606121', 'step': 15308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.637653', 'step': 15308, 'epoch': 3} {'type': 'loss', 'content': 0.11438538134098053, 'timestamp': '2025-09-10 02:43:59.642725', 'step': 15309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:43:59.673476', 'step': 15309, 'epoch': 3} {'type': 'loss', 'content': 0.07095249742269516, 'timestamp': '2025-09-10 02:43:59.676104', 'step': 15310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.707183', 'step': 15310, 'epoch': 3} {'type': 'loss', 'content': 0.09424728900194168, 'timestamp': '2025-09-10 02:43:59.709348', 'step': 15311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:59.740357', 'step': 15311, 'epoch': 3} {'type': 'loss', 'content': 0.09310517460107803, 'timestamp': '2025-09-10 02:43:59.765293', 'step': 15312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:59.796150', 'step': 15312, 'epoch': 3} {'type': 'loss', 'content': 0.0884881466627121, 'timestamp': '2025-09-10 02:43:59.798988', 'step': 15313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.832028', 'step': 15313, 'epoch': 3} {'type': 'loss', 'content': 0.04223920404911041, 'timestamp': '2025-09-10 02:43:59.834389', 'step': 15314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:43:59.864556', 'step': 15314, 'epoch': 3} {'type': 'loss', 'content': 0.17368923127651215, 'timestamp': '2025-09-10 02:43:59.866924', 'step': 15315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:43:59.896564', 'step': 15315, 'epoch': 3} {'type': 'loss', 'content': 0.10189291089773178, 'timestamp': '2025-09-10 02:43:59.919916', 'step': 15316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:43:59.950564', 'step': 15316, 'epoch': 3} {'type': 'loss', 'content': 0.10194319486618042, 'timestamp': '2025-09-10 02:43:59.953984', 'step': 15317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:43:59.991257', 'step': 15317, 'epoch': 3} {'type': 'loss', 'content': 0.16688765585422516, 'timestamp': '2025-09-10 02:43:59.993537', 'step': 15318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:00.023704', 'step': 15318, 'epoch': 3} {'type': 'loss', 'content': 0.054886024445295334, 'timestamp': '2025-09-10 02:44:00.026325', 'step': 15319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:00.056511', 'step': 15319, 'epoch': 3} {'type': 'loss', 'content': 0.13511374592781067, 'timestamp': '2025-09-10 02:44:00.080016', 'step': 15320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:00.110972', 'step': 15320, 'epoch': 3} {'type': 'loss', 'content': 0.07040218263864517, 'timestamp': '2025-09-10 02:44:00.113265', 'step': 15321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:00.143084', 'step': 15321, 'epoch': 3} {'type': 'loss', 'content': 0.09072915464639664, 'timestamp': '2025-09-10 02:44:00.147328', 'step': 15322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:00.182203', 'step': 15322, 'epoch': 3} {'type': 'loss', 'content': 0.10820797830820084, 'timestamp': '2025-09-10 02:44:00.186094', 'step': 15323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:00.221206', 'step': 15323, 'epoch': 3} {'type': 'loss', 'content': 0.02109646238386631, 'timestamp': '2025-09-10 02:44:00.246624', 'step': 15324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:00.283706', 'step': 15324, 'epoch': 3} {'type': 'loss', 'content': 0.1319597363471985, 'timestamp': '2025-09-10 02:44:00.287757', 'step': 15325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:00.325367', 'step': 15325, 'epoch': 3} {'type': 'loss', 'content': 0.07226812094449997, 'timestamp': '2025-09-10 02:44:00.329379', 'step': 15326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:00.367878', 'step': 15326, 'epoch': 3} {'type': 'loss', 'content': 0.05047844722867012, 'timestamp': '2025-09-10 02:44:00.370600', 'step': 15327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:00.402783', 'step': 15327, 'epoch': 3} {'type': 'loss', 'content': 0.07640063017606735, 'timestamp': '2025-09-10 02:44:00.426500', 'step': 15328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:00.456519', 'step': 15328, 'epoch': 3} {'type': 'loss', 'content': 0.08164911717176437, 'timestamp': '2025-09-10 02:44:00.459019', 'step': 15329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:00.489529', 'step': 15329, 'epoch': 3} {'type': 'loss', 'content': 0.0734570324420929, 'timestamp': '2025-09-10 02:44:00.492150', 'step': 15330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:00.523135', 'step': 15330, 'epoch': 3} {'type': 'loss', 'content': 0.09983954578638077, 'timestamp': '2025-09-10 02:44:00.525379', 'step': 15331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:00.555506', 'step': 15331, 'epoch': 3} {'type': 'loss', 'content': 0.10623779147863388, 'timestamp': '2025-09-10 02:44:00.578945', 'step': 15332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:00.609137', 'step': 15332, 'epoch': 3} {'type': 'loss', 'content': 0.0629357397556305, 'timestamp': '2025-09-10 02:44:00.611377', 'step': 15333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:00.641789', 'step': 15333, 'epoch': 3} {'type': 'loss', 'content': 0.06276507675647736, 'timestamp': '2025-09-10 02:44:00.644204', 'step': 15334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:00.676191', 'step': 15334, 'epoch': 3} {'type': 'loss', 'content': 0.05245470255613327, 'timestamp': '2025-09-10 02:44:00.678543', 'step': 15335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:00.709873', 'step': 15335, 'epoch': 3} {'type': 'loss', 'content': 0.021844616159796715, 'timestamp': '2025-09-10 02:44:00.733170', 'step': 15336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:00.764444', 'step': 15336, 'epoch': 3} {'type': 'loss', 'content': 0.02701464109122753, 'timestamp': '2025-09-10 02:44:00.766808', 'step': 15337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:00.798192', 'step': 15337, 'epoch': 3} {'type': 'loss', 'content': 0.040917545557022095, 'timestamp': '2025-09-10 02:44:00.801505', 'step': 15338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:00.833826', 'step': 15338, 'epoch': 3} {'type': 'loss', 'content': 0.0497111976146698, 'timestamp': '2025-09-10 02:44:00.836276', 'step': 15339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:00.866435', 'step': 15339, 'epoch': 3} {'type': 'loss', 'content': 0.07752933353185654, 'timestamp': '2025-09-10 02:44:00.890191', 'step': 15340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:00.920815', 'step': 15340, 'epoch': 3} {'type': 'loss', 'content': 0.07320351898670197, 'timestamp': '2025-09-10 02:44:00.923281', 'step': 15341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:00.955261', 'step': 15341, 'epoch': 3} {'type': 'loss', 'content': 0.178026020526886, 'timestamp': '2025-09-10 02:44:00.957359', 'step': 15342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:00.987278', 'step': 15342, 'epoch': 3} {'type': 'loss', 'content': 0.06909077614545822, 'timestamp': '2025-09-10 02:44:00.989721', 'step': 15343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:01.020288', 'step': 15343, 'epoch': 3} {'type': 'loss', 'content': 0.09731805324554443, 'timestamp': '2025-09-10 02:44:01.043714', 'step': 15344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:01.074505', 'step': 15344, 'epoch': 3} {'type': 'loss', 'content': 0.03442362695932388, 'timestamp': '2025-09-10 02:44:01.076685', 'step': 15345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:01.108489', 'step': 15345, 'epoch': 3} {'type': 'loss', 'content': 0.11155646294355392, 'timestamp': '2025-09-10 02:44:01.110733', 'step': 15346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:01.140793', 'step': 15346, 'epoch': 3} {'type': 'loss', 'content': 0.10693497955799103, 'timestamp': '2025-09-10 02:44:01.143432', 'step': 15347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:01.175146', 'step': 15347, 'epoch': 3} {'type': 'loss', 'content': 0.09487589448690414, 'timestamp': '2025-09-10 02:44:01.198793', 'step': 15348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:01.229439', 'step': 15348, 'epoch': 3} {'type': 'loss', 'content': 0.05169932171702385, 'timestamp': '2025-09-10 02:44:01.231861', 'step': 15349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:01.263308', 'step': 15349, 'epoch': 3} {'type': 'loss', 'content': 0.002024176064878702, 'timestamp': '2025-09-10 02:44:01.265533', 'step': 15350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:01.296459', 'step': 15350, 'epoch': 3} {'type': 'loss', 'content': 0.08598486334085464, 'timestamp': '2025-09-10 02:44:01.299305', 'step': 15351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:01.329960', 'step': 15351, 'epoch': 3} {'type': 'loss', 'content': 0.10179959237575531, 'timestamp': '2025-09-10 02:44:01.353708', 'step': 15352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:01.384135', 'step': 15352, 'epoch': 3} {'type': 'loss', 'content': 0.0518411286175251, 'timestamp': '2025-09-10 02:44:01.386499', 'step': 15353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:01.416533', 'step': 15353, 'epoch': 3} {'type': 'loss', 'content': 0.04214548319578171, 'timestamp': '2025-09-10 02:44:01.418803', 'step': 15354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:01.449306', 'step': 15354, 'epoch': 3} {'type': 'loss', 'content': 0.09158047288656235, 'timestamp': '2025-09-10 02:44:01.451572', 'step': 15355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:01.481927', 'step': 15355, 'epoch': 3} {'type': 'loss', 'content': 0.032341379672288895, 'timestamp': '2025-09-10 02:44:01.505650', 'step': 15356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:01.535624', 'step': 15356, 'epoch': 3} {'type': 'loss', 'content': 0.1181529238820076, 'timestamp': '2025-09-10 02:44:01.538254', 'step': 15357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:01.568389', 'step': 15357, 'epoch': 3} {'type': 'loss', 'content': 0.12580524384975433, 'timestamp': '2025-09-10 02:44:01.570527', 'step': 15358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:01.600130', 'step': 15358, 'epoch': 3} {'type': 'loss', 'content': 0.03520633280277252, 'timestamp': '2025-09-10 02:44:01.602593', 'step': 15359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:01.633865', 'step': 15359, 'epoch': 3} {'type': 'loss', 'content': 0.10804148763418198, 'timestamp': '2025-09-10 02:44:01.657437', 'step': 15360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:01.688379', 'step': 15360, 'epoch': 3} {'type': 'loss', 'content': 0.06657402217388153, 'timestamp': '2025-09-10 02:44:01.690655', 'step': 15361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:01.721374', 'step': 15361, 'epoch': 3} {'type': 'loss', 'content': 0.028007905930280685, 'timestamp': '2025-09-10 02:44:01.723726', 'step': 15362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:01.753877', 'step': 15362, 'epoch': 3} {'type': 'loss', 'content': 0.08025029301643372, 'timestamp': '2025-09-10 02:44:01.756350', 'step': 15363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:01.787038', 'step': 15363, 'epoch': 3} {'type': 'loss', 'content': 0.09114810079336166, 'timestamp': '2025-09-10 02:44:01.811974', 'step': 15364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:01.842776', 'step': 15364, 'epoch': 3} {'type': 'loss', 'content': 0.1291469931602478, 'timestamp': '2025-09-10 02:44:01.845274', 'step': 15365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:01.875410', 'step': 15365, 'epoch': 3} {'type': 'loss', 'content': 0.14455117285251617, 'timestamp': '2025-09-10 02:44:01.877660', 'step': 15366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:01.907457', 'step': 15366, 'epoch': 3} {'type': 'loss', 'content': 0.11773516237735748, 'timestamp': '2025-09-10 02:44:01.909671', 'step': 15367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:01.940440', 'step': 15367, 'epoch': 3} {'type': 'loss', 'content': 0.08523380011320114, 'timestamp': '2025-09-10 02:44:01.964009', 'step': 15368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:01.994918', 'step': 15368, 'epoch': 3} {'type': 'loss', 'content': 0.10048151761293411, 'timestamp': '2025-09-10 02:44:01.997295', 'step': 15369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:02.028320', 'step': 15369, 'epoch': 3} {'type': 'loss', 'content': 0.09491100907325745, 'timestamp': '2025-09-10 02:44:02.030740', 'step': 15370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:02.061483', 'step': 15370, 'epoch': 3} {'type': 'loss', 'content': 0.06504666060209274, 'timestamp': '2025-09-10 02:44:02.064781', 'step': 15371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:02.097793', 'step': 15371, 'epoch': 3} {'type': 'loss', 'content': 0.05357494577765465, 'timestamp': '2025-09-10 02:44:02.121348', 'step': 15372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:02.153444', 'step': 15372, 'epoch': 3} {'type': 'loss', 'content': 0.09785206615924835, 'timestamp': '2025-09-10 02:44:02.156012', 'step': 15373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:02.188357', 'step': 15373, 'epoch': 3} {'type': 'loss', 'content': 0.13923610746860504, 'timestamp': '2025-09-10 02:44:02.191305', 'step': 15374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:02.223060', 'step': 15374, 'epoch': 3} {'type': 'loss', 'content': 0.10917802155017853, 'timestamp': '2025-09-10 02:44:02.225544', 'step': 15375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:02.256069', 'step': 15375, 'epoch': 3} {'type': 'loss', 'content': 0.059855252504348755, 'timestamp': '2025-09-10 02:44:02.280140', 'step': 15376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:02.311828', 'step': 15376, 'epoch': 3} {'type': 'loss', 'content': 0.10601267218589783, 'timestamp': '2025-09-10 02:44:02.314156', 'step': 15377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:02.346801', 'step': 15377, 'epoch': 3} {'type': 'loss', 'content': 0.05108925327658653, 'timestamp': '2025-09-10 02:44:02.349920', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:44:10.117649', 'step': 15378, 'epoch': 3} {'type': 'pplx', 'content': 9429.449285084964, 'timestamp': '2025-09-10 02:44:10.120586', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:10.150680', 'step': 15378, 'epoch': 3} {'type': 'loss', 'content': 0.06433073431253433, 'timestamp': '2025-09-10 02:44:10.153133', 'step': 15379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:10.184184', 'step': 15379, 'epoch': 3} {'type': 'loss', 'content': 0.17209188640117645, 'timestamp': '2025-09-10 02:44:10.207479', 'step': 15380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:10.237535', 'step': 15380, 'epoch': 3} {'type': 'loss', 'content': 0.1223648339509964, 'timestamp': '2025-09-10 02:44:10.240224', 'step': 15381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.270495', 'step': 15381, 'epoch': 3} {'type': 'loss', 'content': 0.10215871036052704, 'timestamp': '2025-09-10 02:44:10.272745', 'step': 15382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:10.302921', 'step': 15382, 'epoch': 3} {'type': 'loss', 'content': 0.009933163411915302, 'timestamp': '2025-09-10 02:44:10.305319', 'step': 15383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.335533', 'step': 15383, 'epoch': 3} {'type': 'loss', 'content': 0.11126624047756195, 'timestamp': '2025-09-10 02:44:10.359307', 'step': 15384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:10.390712', 'step': 15384, 'epoch': 3} {'type': 'loss', 'content': 0.06862842291593552, 'timestamp': '2025-09-10 02:44:10.392983', 'step': 15385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.424203', 'step': 15385, 'epoch': 3} {'type': 'loss', 'content': 0.0712541714310646, 'timestamp': '2025-09-10 02:44:10.426401', 'step': 15386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.456385', 'step': 15386, 'epoch': 3} {'type': 'loss', 'content': 0.040231626480817795, 'timestamp': '2025-09-10 02:44:10.458450', 'step': 15387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.487810', 'step': 15387, 'epoch': 3} {'type': 'loss', 'content': 0.10297262668609619, 'timestamp': '2025-09-10 02:44:10.511590', 'step': 15388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.541617', 'step': 15388, 'epoch': 3} {'type': 'loss', 'content': 0.1186210960149765, 'timestamp': '2025-09-10 02:44:10.543696', 'step': 15389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.573755', 'step': 15389, 'epoch': 3} {'type': 'loss', 'content': 0.04699251800775528, 'timestamp': '2025-09-10 02:44:10.575873', 'step': 15390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:10.605289', 'step': 15390, 'epoch': 3} {'type': 'loss', 'content': 0.17415565252304077, 'timestamp': '2025-09-10 02:44:10.607727', 'step': 15391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.637943', 'step': 15391, 'epoch': 3} {'type': 'loss', 'content': 0.09255378693342209, 'timestamp': '2025-09-10 02:44:10.661374', 'step': 15392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:10.692189', 'step': 15392, 'epoch': 3} {'type': 'loss', 'content': 0.044121429324150085, 'timestamp': '2025-09-10 02:44:10.694385', 'step': 15393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:10.723562', 'step': 15393, 'epoch': 3} {'type': 'loss', 'content': 0.0574001781642437, 'timestamp': '2025-09-10 02:44:10.726010', 'step': 15394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:10.758282', 'step': 15394, 'epoch': 3} {'type': 'loss', 'content': 0.09184063225984573, 'timestamp': '2025-09-10 02:44:10.760483', 'step': 15395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.790262', 'step': 15395, 'epoch': 3} {'type': 'loss', 'content': 0.03938274085521698, 'timestamp': '2025-09-10 02:44:10.814492', 'step': 15396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:10.845140', 'step': 15396, 'epoch': 3} {'type': 'loss', 'content': 0.10831264406442642, 'timestamp': '2025-09-10 02:44:10.847470', 'step': 15397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:10.877130', 'step': 15397, 'epoch': 3} {'type': 'loss', 'content': 0.09282172471284866, 'timestamp': '2025-09-10 02:44:10.879410', 'step': 15398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:10.911031', 'step': 15398, 'epoch': 3} {'type': 'loss', 'content': 0.08812468498945236, 'timestamp': '2025-09-10 02:44:10.913772', 'step': 15399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:10.943864', 'step': 15399, 'epoch': 3} {'type': 'loss', 'content': 0.1821945607662201, 'timestamp': '2025-09-10 02:44:10.967428', 'step': 15400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:44:10.998339', 'step': 15400, 'epoch': 3} {'type': 'loss', 'content': 0.11256678402423859, 'timestamp': '2025-09-10 02:44:11.000596', 'step': 15401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.032210', 'step': 15401, 'epoch': 3} {'type': 'loss', 'content': 0.06309744715690613, 'timestamp': '2025-09-10 02:44:11.034375', 'step': 15402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:11.064186', 'step': 15402, 'epoch': 3} {'type': 'loss', 'content': 0.056945495307445526, 'timestamp': '2025-09-10 02:44:11.067360', 'step': 15403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:11.098727', 'step': 15403, 'epoch': 3} {'type': 'loss', 'content': 0.06618979573249817, 'timestamp': '2025-09-10 02:44:11.130771', 'step': 15404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:11.163343', 'step': 15404, 'epoch': 3} {'type': 'loss', 'content': 0.061950795352458954, 'timestamp': '2025-09-10 02:44:11.165458', 'step': 15405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.195054', 'step': 15405, 'epoch': 3} {'type': 'loss', 'content': 0.029747499153017998, 'timestamp': '2025-09-10 02:44:11.197471', 'step': 15406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:11.227481', 'step': 15406, 'epoch': 3} {'type': 'loss', 'content': 0.026761138811707497, 'timestamp': '2025-09-10 02:44:11.229873', 'step': 15407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.261270', 'step': 15407, 'epoch': 3} {'type': 'loss', 'content': 0.11509496718645096, 'timestamp': '2025-09-10 02:44:11.285823', 'step': 15408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:11.315964', 'step': 15408, 'epoch': 3} {'type': 'loss', 'content': 0.10511823743581772, 'timestamp': '2025-09-10 02:44:11.318107', 'step': 15409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.348632', 'step': 15409, 'epoch': 3} {'type': 'loss', 'content': 0.09252028912305832, 'timestamp': '2025-09-10 02:44:11.353513', 'step': 15410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:11.384410', 'step': 15410, 'epoch': 3} {'type': 'loss', 'content': 0.10809244215488434, 'timestamp': '2025-09-10 02:44:11.386446', 'step': 15411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.416147', 'step': 15411, 'epoch': 3} {'type': 'loss', 'content': 0.048092007637023926, 'timestamp': '2025-09-10 02:44:11.439842', 'step': 15412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.470086', 'step': 15412, 'epoch': 3} {'type': 'loss', 'content': 0.05461566150188446, 'timestamp': '2025-09-10 02:44:11.472421', 'step': 15413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:11.502042', 'step': 15413, 'epoch': 3} {'type': 'loss', 'content': 0.09759360551834106, 'timestamp': '2025-09-10 02:44:11.504004', 'step': 15414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.534654', 'step': 15414, 'epoch': 3} {'type': 'loss', 'content': 0.08412383496761322, 'timestamp': '2025-09-10 02:44:11.536948', 'step': 15415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:11.567052', 'step': 15415, 'epoch': 3} {'type': 'loss', 'content': 0.15383760631084442, 'timestamp': '2025-09-10 02:44:11.590687', 'step': 15416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:11.620663', 'step': 15416, 'epoch': 3} {'type': 'loss', 'content': 0.07623209804296494, 'timestamp': '2025-09-10 02:44:11.622654', 'step': 15417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:11.652732', 'step': 15417, 'epoch': 3} {'type': 'loss', 'content': 0.04876994341611862, 'timestamp': '2025-09-10 02:44:11.655496', 'step': 15418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.690031', 'step': 15418, 'epoch': 3} {'type': 'loss', 'content': 0.0410076379776001, 'timestamp': '2025-09-10 02:44:11.692357', 'step': 15419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:11.721884', 'step': 15419, 'epoch': 3} {'type': 'loss', 'content': 0.02511371672153473, 'timestamp': '2025-09-10 02:44:11.752449', 'step': 15420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:11.790882', 'step': 15420, 'epoch': 3} {'type': 'loss', 'content': 0.049484509974718094, 'timestamp': '2025-09-10 02:44:11.793980', 'step': 15421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:11.825191', 'step': 15421, 'epoch': 3} {'type': 'loss', 'content': 0.10497492551803589, 'timestamp': '2025-09-10 02:44:11.827857', 'step': 15422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:11.861026', 'step': 15422, 'epoch': 3} {'type': 'loss', 'content': 0.05645187199115753, 'timestamp': '2025-09-10 02:44:11.863215', 'step': 15423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.893809', 'step': 15423, 'epoch': 3} {'type': 'loss', 'content': 0.058524809777736664, 'timestamp': '2025-09-10 02:44:11.917336', 'step': 15424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:11.948946', 'step': 15424, 'epoch': 3} {'type': 'loss', 'content': 0.04066294804215431, 'timestamp': '2025-09-10 02:44:11.951276', 'step': 15425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:11.983325', 'step': 15425, 'epoch': 3} {'type': 'loss', 'content': 0.051961515098810196, 'timestamp': '2025-09-10 02:44:11.985458', 'step': 15426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.015115', 'step': 15426, 'epoch': 3} {'type': 'loss', 'content': 0.08286873996257782, 'timestamp': '2025-09-10 02:44:12.017327', 'step': 15427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.048670', 'step': 15427, 'epoch': 3} {'type': 'loss', 'content': 0.08871965110301971, 'timestamp': '2025-09-10 02:44:12.072456', 'step': 15428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:12.106508', 'step': 15428, 'epoch': 3} {'type': 'loss', 'content': 0.03259950503706932, 'timestamp': '2025-09-10 02:44:12.110317', 'step': 15429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.140371', 'step': 15429, 'epoch': 3} {'type': 'loss', 'content': 0.04237677529454231, 'timestamp': '2025-09-10 02:44:12.145736', 'step': 15430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:12.184409', 'step': 15430, 'epoch': 3} {'type': 'loss', 'content': 0.02005581185221672, 'timestamp': '2025-09-10 02:44:12.186732', 'step': 15431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.216529', 'step': 15431, 'epoch': 3} {'type': 'loss', 'content': 0.06753740459680557, 'timestamp': '2025-09-10 02:44:12.240008', 'step': 15432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.269844', 'step': 15432, 'epoch': 3} {'type': 'loss', 'content': 0.10188225656747818, 'timestamp': '2025-09-10 02:44:12.273463', 'step': 15433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:12.303489', 'step': 15433, 'epoch': 3} {'type': 'loss', 'content': 0.09514352679252625, 'timestamp': '2025-09-10 02:44:12.306256', 'step': 15434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:12.336458', 'step': 15434, 'epoch': 3} {'type': 'loss', 'content': 0.04083341732621193, 'timestamp': '2025-09-10 02:44:12.338891', 'step': 15435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:12.368610', 'step': 15435, 'epoch': 3} {'type': 'loss', 'content': 0.06682201474905014, 'timestamp': '2025-09-10 02:44:12.392676', 'step': 15436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.422840', 'step': 15436, 'epoch': 3} {'type': 'loss', 'content': 0.0837424024939537, 'timestamp': '2025-09-10 02:44:12.427091', 'step': 15437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.457527', 'step': 15437, 'epoch': 3} {'type': 'loss', 'content': 0.06360291689634323, 'timestamp': '2025-09-10 02:44:12.460127', 'step': 15438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:12.490148', 'step': 15438, 'epoch': 3} {'type': 'loss', 'content': 0.15892012417316437, 'timestamp': '2025-09-10 02:44:12.492284', 'step': 15439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:12.522289', 'step': 15439, 'epoch': 3} {'type': 'loss', 'content': 0.06561966240406036, 'timestamp': '2025-09-10 02:44:12.546123', 'step': 15440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:12.576538', 'step': 15440, 'epoch': 3} {'type': 'loss', 'content': 0.06439846754074097, 'timestamp': '2025-09-10 02:44:12.578782', 'step': 15441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:12.608279', 'step': 15441, 'epoch': 3} {'type': 'loss', 'content': 0.15360906720161438, 'timestamp': '2025-09-10 02:44:12.610438', 'step': 15442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:12.640448', 'step': 15442, 'epoch': 3} {'type': 'loss', 'content': 0.0915633961558342, 'timestamp': '2025-09-10 02:44:12.643400', 'step': 15443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.673898', 'step': 15443, 'epoch': 3} {'type': 'loss', 'content': 0.016855936497449875, 'timestamp': '2025-09-10 02:44:12.697332', 'step': 15444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.727155', 'step': 15444, 'epoch': 3} {'type': 'loss', 'content': 0.05445782095193863, 'timestamp': '2025-09-10 02:44:12.730994', 'step': 15445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:12.761149', 'step': 15445, 'epoch': 3} {'type': 'loss', 'content': 0.11108708381652832, 'timestamp': '2025-09-10 02:44:12.763638', 'step': 15446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.794183', 'step': 15446, 'epoch': 3} {'type': 'loss', 'content': 0.10009298473596573, 'timestamp': '2025-09-10 02:44:12.796629', 'step': 15447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:44:12.826618', 'step': 15447, 'epoch': 3} {'type': 'loss', 'content': 0.06673261523246765, 'timestamp': '2025-09-10 02:44:12.851552', 'step': 15448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:12.882127', 'step': 15448, 'epoch': 3} {'type': 'loss', 'content': 0.07986962050199509, 'timestamp': '2025-09-10 02:44:12.884015', 'step': 15449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:12.913856', 'step': 15449, 'epoch': 3} {'type': 'loss', 'content': 0.1038908138871193, 'timestamp': '2025-09-10 02:44:12.917084', 'step': 15450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:12.947568', 'step': 15450, 'epoch': 3} {'type': 'loss', 'content': 0.06628400087356567, 'timestamp': '2025-09-10 02:44:12.949825', 'step': 15451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:12.980575', 'step': 15451, 'epoch': 3} {'type': 'loss', 'content': 0.046376653015613556, 'timestamp': '2025-09-10 02:44:13.004418', 'step': 15452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:44:13.034145', 'step': 15452, 'epoch': 3} {'type': 'loss', 'content': 0.08497821539640427, 'timestamp': '2025-09-10 02:44:13.036468', 'step': 15453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:44:13.066296', 'step': 15453, 'epoch': 3} {'type': 'loss', 'content': 0.05899186432361603, 'timestamp': '2025-09-10 02:44:13.070790', 'step': 15454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.101179', 'step': 15454, 'epoch': 3} {'type': 'loss', 'content': 0.07747779041528702, 'timestamp': '2025-09-10 02:44:13.103660', 'step': 15455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:13.133352', 'step': 15455, 'epoch': 3} {'type': 'loss', 'content': 0.13993069529533386, 'timestamp': '2025-09-10 02:44:13.159325', 'step': 15456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.189779', 'step': 15456, 'epoch': 3} {'type': 'loss', 'content': 0.032240528613328934, 'timestamp': '2025-09-10 02:44:13.192185', 'step': 15457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.222578', 'step': 15457, 'epoch': 3} {'type': 'loss', 'content': 0.04077230393886566, 'timestamp': '2025-09-10 02:44:13.224984', 'step': 15458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.255032', 'step': 15458, 'epoch': 3} {'type': 'loss', 'content': 0.11699520796537399, 'timestamp': '2025-09-10 02:44:13.257431', 'step': 15459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:13.287000', 'step': 15459, 'epoch': 3} {'type': 'loss', 'content': 0.05243518576025963, 'timestamp': '2025-09-10 02:44:13.310204', 'step': 15460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.340364', 'step': 15460, 'epoch': 3} {'type': 'loss', 'content': 0.07379471510648727, 'timestamp': '2025-09-10 02:44:13.342620', 'step': 15461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.372713', 'step': 15461, 'epoch': 3} {'type': 'loss', 'content': 0.07578574866056442, 'timestamp': '2025-09-10 02:44:13.375438', 'step': 15462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.404919', 'step': 15462, 'epoch': 3} {'type': 'loss', 'content': 0.03656671941280365, 'timestamp': '2025-09-10 02:44:13.407097', 'step': 15463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:13.439145', 'step': 15463, 'epoch': 3} {'type': 'loss', 'content': 0.0242006815969944, 'timestamp': '2025-09-10 02:44:13.464066', 'step': 15464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:13.494686', 'step': 15464, 'epoch': 3} {'type': 'loss', 'content': 0.06918953359127045, 'timestamp': '2025-09-10 02:44:13.496973', 'step': 15465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.527395', 'step': 15465, 'epoch': 3} {'type': 'loss', 'content': 0.06684638559818268, 'timestamp': '2025-09-10 02:44:13.529621', 'step': 15466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:13.559811', 'step': 15466, 'epoch': 3} {'type': 'loss', 'content': 0.05189155042171478, 'timestamp': '2025-09-10 02:44:13.562094', 'step': 15467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:13.592096', 'step': 15467, 'epoch': 3} {'type': 'loss', 'content': 0.09805396944284439, 'timestamp': '2025-09-10 02:44:13.615578', 'step': 15468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:13.660739', 'step': 15468, 'epoch': 3} {'type': 'loss', 'content': 0.09179751574993134, 'timestamp': '2025-09-10 02:44:13.678803', 'step': 15469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.733777', 'step': 15469, 'epoch': 3} {'type': 'loss', 'content': 0.06293539702892303, 'timestamp': '2025-09-10 02:44:13.753283', 'step': 15470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.830021', 'step': 15470, 'epoch': 3} {'type': 'loss', 'content': 0.047908321022987366, 'timestamp': '2025-09-10 02:44:13.833578', 'step': 15471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:13.880512', 'step': 15471, 'epoch': 3} {'type': 'loss', 'content': 0.0898314043879509, 'timestamp': '2025-09-10 02:44:13.904313', 'step': 15472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:13.937956', 'step': 15472, 'epoch': 3} {'type': 'loss', 'content': 0.06139142066240311, 'timestamp': '2025-09-10 02:44:13.940574', 'step': 15473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:13.973840', 'step': 15473, 'epoch': 3} {'type': 'loss', 'content': 0.04383023455739021, 'timestamp': '2025-09-10 02:44:13.977233', 'step': 15474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.007152', 'step': 15474, 'epoch': 3} {'type': 'loss', 'content': 0.0850151851773262, 'timestamp': '2025-09-10 02:44:14.009345', 'step': 15475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.039893', 'step': 15475, 'epoch': 3} {'type': 'loss', 'content': 0.06388118863105774, 'timestamp': '2025-09-10 02:44:14.063864', 'step': 15476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.094140', 'step': 15476, 'epoch': 3} {'type': 'loss', 'content': 0.048074871301651, 'timestamp': '2025-09-10 02:44:14.097447', 'step': 15477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:14.131310', 'step': 15477, 'epoch': 3} {'type': 'loss', 'content': 0.03470989316701889, 'timestamp': '2025-09-10 02:44:14.133855', 'step': 15478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.164229', 'step': 15478, 'epoch': 3} {'type': 'loss', 'content': 0.07043658941984177, 'timestamp': '2025-09-10 02:44:14.168503', 'step': 15479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:14.203815', 'step': 15479, 'epoch': 3} {'type': 'loss', 'content': 0.05271608382463455, 'timestamp': '2025-09-10 02:44:14.227664', 'step': 15480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.259857', 'step': 15480, 'epoch': 3} {'type': 'loss', 'content': 0.08474637567996979, 'timestamp': '2025-09-10 02:44:14.263660', 'step': 15481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:14.294295', 'step': 15481, 'epoch': 3} {'type': 'loss', 'content': 0.05909936502575874, 'timestamp': '2025-09-10 02:44:14.296870', 'step': 15482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:14.328133', 'step': 15482, 'epoch': 3} {'type': 'loss', 'content': 0.08549761027097702, 'timestamp': '2025-09-10 02:44:14.331268', 'step': 15483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:14.363752', 'step': 15483, 'epoch': 3} {'type': 'loss', 'content': 0.16090618073940277, 'timestamp': '2025-09-10 02:44:14.387620', 'step': 15484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:44:14.419421', 'step': 15484, 'epoch': 3} {'type': 'loss', 'content': 0.09442714601755142, 'timestamp': '2025-09-10 02:44:14.424438', 'step': 15485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.454727', 'step': 15485, 'epoch': 3} {'type': 'loss', 'content': 0.07348322123289108, 'timestamp': '2025-09-10 02:44:14.458714', 'step': 15486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:14.494940', 'step': 15486, 'epoch': 3} {'type': 'loss', 'content': 0.06306059658527374, 'timestamp': '2025-09-10 02:44:14.497253', 'step': 15487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:14.526958', 'step': 15487, 'epoch': 3} {'type': 'loss', 'content': 0.05301542207598686, 'timestamp': '2025-09-10 02:44:14.550327', 'step': 15488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.583248', 'step': 15488, 'epoch': 3} {'type': 'loss', 'content': 0.059099871665239334, 'timestamp': '2025-09-10 02:44:14.585869', 'step': 15489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.616290', 'step': 15489, 'epoch': 3} {'type': 'loss', 'content': 0.10640601068735123, 'timestamp': '2025-09-10 02:44:14.618711', 'step': 15490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:14.649184', 'step': 15490, 'epoch': 3} {'type': 'loss', 'content': 0.06432685256004333, 'timestamp': '2025-09-10 02:44:14.652563', 'step': 15491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.683478', 'step': 15491, 'epoch': 3} {'type': 'loss', 'content': 0.01865418814122677, 'timestamp': '2025-09-10 02:44:14.706907', 'step': 15492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.737008', 'step': 15492, 'epoch': 3} {'type': 'loss', 'content': 0.05481729656457901, 'timestamp': '2025-09-10 02:44:14.739637', 'step': 15493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:14.771515', 'step': 15493, 'epoch': 3} {'type': 'loss', 'content': 0.19526682794094086, 'timestamp': '2025-09-10 02:44:14.774213', 'step': 15494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.804543', 'step': 15494, 'epoch': 3} {'type': 'loss', 'content': 0.02630075067281723, 'timestamp': '2025-09-10 02:44:14.808135', 'step': 15495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:14.839491', 'step': 15495, 'epoch': 3} {'type': 'loss', 'content': 0.05499229580163956, 'timestamp': '2025-09-10 02:44:14.863464', 'step': 15496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:14.893665', 'step': 15496, 'epoch': 3} {'type': 'loss', 'content': 0.017596276476979256, 'timestamp': '2025-09-10 02:44:14.895656', 'step': 15497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:14.925070', 'step': 15497, 'epoch': 3} {'type': 'loss', 'content': 0.13272333145141602, 'timestamp': '2025-09-10 02:44:14.927595', 'step': 15498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:14.958864', 'step': 15498, 'epoch': 3} {'type': 'loss', 'content': 0.1223762184381485, 'timestamp': '2025-09-10 02:44:14.961304', 'step': 15499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:14.993895', 'step': 15499, 'epoch': 3} {'type': 'loss', 'content': 0.13198281824588776, 'timestamp': '2025-09-10 02:44:15.017973', 'step': 15500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15500', 'timestamp': '2025-09-10 02:44:21.396478', 'step': 15500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:21.439142', 'step': 15500, 'epoch': 3} {'type': 'loss', 'content': 0.04398711770772934, 'timestamp': '2025-09-10 02:44:21.441413', 'step': 15501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:21.473946', 'step': 15501, 'epoch': 3} {'type': 'loss', 'content': 0.04913400858640671, 'timestamp': '2025-09-10 02:44:21.476311', 'step': 15502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:21.506383', 'step': 15502, 'epoch': 3} {'type': 'loss', 'content': 0.06057986617088318, 'timestamp': '2025-09-10 02:44:21.508706', 'step': 15503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:21.539182', 'step': 15503, 'epoch': 3} {'type': 'loss', 'content': 0.12169987708330154, 'timestamp': '2025-09-10 02:44:21.562507', 'step': 15504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:21.592933', 'step': 15504, 'epoch': 3} {'type': 'loss', 'content': 0.08512505143880844, 'timestamp': '2025-09-10 02:44:21.595402', 'step': 15505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:44:21.625721', 'step': 15505, 'epoch': 3} {'type': 'loss', 'content': 0.08310510218143463, 'timestamp': '2025-09-10 02:44:21.632965', 'step': 15506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:21.665027', 'step': 15506, 'epoch': 3} {'type': 'loss', 'content': 0.09096826612949371, 'timestamp': '2025-09-10 02:44:21.667454', 'step': 15507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:21.697844', 'step': 15507, 'epoch': 3} {'type': 'loss', 'content': 0.10467486828565598, 'timestamp': '2025-09-10 02:44:21.721469', 'step': 15508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:21.751026', 'step': 15508, 'epoch': 3} {'type': 'loss', 'content': 0.16095012426376343, 'timestamp': '2025-09-10 02:44:21.753605', 'step': 15509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:21.783435', 'step': 15509, 'epoch': 3} {'type': 'loss', 'content': 0.06791664659976959, 'timestamp': '2025-09-10 02:44:21.785911', 'step': 15510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:21.815940', 'step': 15510, 'epoch': 3} {'type': 'loss', 'content': 0.06165305897593498, 'timestamp': '2025-09-10 02:44:21.818561', 'step': 15511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:44:21.850983', 'step': 15511, 'epoch': 3} {'type': 'loss', 'content': 0.09641923010349274, 'timestamp': '2025-09-10 02:44:21.879114', 'step': 15512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:21.908218', 'step': 15512, 'epoch': 3} {'type': 'loss', 'content': 0.06205367296934128, 'timestamp': '2025-09-10 02:44:21.910520', 'step': 15513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:21.940175', 'step': 15513, 'epoch': 3} {'type': 'loss', 'content': 0.08993130177259445, 'timestamp': '2025-09-10 02:44:21.942383', 'step': 15514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:21.971849', 'step': 15514, 'epoch': 3} {'type': 'loss', 'content': 0.09623027592897415, 'timestamp': '2025-09-10 02:44:21.974328', 'step': 15515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:22.004458', 'step': 15515, 'epoch': 3} {'type': 'loss', 'content': 0.12039447575807571, 'timestamp': '2025-09-10 02:44:22.027870', 'step': 15516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:22.058387', 'step': 15516, 'epoch': 3} {'type': 'loss', 'content': 0.04362731799483299, 'timestamp': '2025-09-10 02:44:22.060580', 'step': 15517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:22.090303', 'step': 15517, 'epoch': 3} {'type': 'loss', 'content': 0.08274059742689133, 'timestamp': '2025-09-10 02:44:22.092541', 'step': 15518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:22.122799', 'step': 15518, 'epoch': 3} {'type': 'loss', 'content': 0.05650503188371658, 'timestamp': '2025-09-10 02:44:22.125336', 'step': 15519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:22.156251', 'step': 15519, 'epoch': 3} {'type': 'loss', 'content': 0.0689496174454689, 'timestamp': '2025-09-10 02:44:22.180095', 'step': 15520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:22.209955', 'step': 15520, 'epoch': 3} {'type': 'loss', 'content': 0.03426791727542877, 'timestamp': '2025-09-10 02:44:22.212648', 'step': 15521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:22.243408', 'step': 15521, 'epoch': 3} {'type': 'loss', 'content': 0.16790667176246643, 'timestamp': '2025-09-10 02:44:22.245735', 'step': 15522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:22.275871', 'step': 15522, 'epoch': 3} {'type': 'loss', 'content': 0.0883137509226799, 'timestamp': '2025-09-10 02:44:22.278398', 'step': 15523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:22.308406', 'step': 15523, 'epoch': 3} {'type': 'loss', 'content': 0.08013810962438583, 'timestamp': '2025-09-10 02:44:22.332338', 'step': 15524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:22.367249', 'step': 15524, 'epoch': 3} {'type': 'loss', 'content': 0.06561383605003357, 'timestamp': '2025-09-10 02:44:22.374615', 'step': 15525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:22.410766', 'step': 15525, 'epoch': 3} {'type': 'loss', 'content': 0.034973617643117905, 'timestamp': '2025-09-10 02:44:22.415495', 'step': 15526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:22.455232', 'step': 15526, 'epoch': 3} {'type': 'loss', 'content': 0.020959656685590744, 'timestamp': '2025-09-10 02:44:22.458790', 'step': 15527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:22.489201', 'step': 15527, 'epoch': 3} {'type': 'loss', 'content': 0.06815784424543381, 'timestamp': '2025-09-10 02:44:22.512738', 'step': 15528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:22.543084', 'step': 15528, 'epoch': 3} {'type': 'loss', 'content': 0.035585660487413406, 'timestamp': '2025-09-10 02:44:22.545285', 'step': 15529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:22.574965', 'step': 15529, 'epoch': 3} {'type': 'loss', 'content': 0.09400624781847, 'timestamp': '2025-09-10 02:44:22.577197', 'step': 15530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:22.607307', 'step': 15530, 'epoch': 3} {'type': 'loss', 'content': 0.11989638209342957, 'timestamp': '2025-09-10 02:44:22.609623', 'step': 15531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:22.639605', 'step': 15531, 'epoch': 3} {'type': 'loss', 'content': 0.07847081124782562, 'timestamp': '2025-09-10 02:44:22.663182', 'step': 15532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:22.693212', 'step': 15532, 'epoch': 3} {'type': 'loss', 'content': 0.09208976477384567, 'timestamp': '2025-09-10 02:44:22.695811', 'step': 15533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:22.725506', 'step': 15533, 'epoch': 3} {'type': 'loss', 'content': 0.054375723004341125, 'timestamp': '2025-09-10 02:44:22.727854', 'step': 15534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:22.757777', 'step': 15534, 'epoch': 3} {'type': 'loss', 'content': 0.15714426338672638, 'timestamp': '2025-09-10 02:44:22.759914', 'step': 15535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:22.791139', 'step': 15535, 'epoch': 3} {'type': 'loss', 'content': 0.10011017322540283, 'timestamp': '2025-09-10 02:44:22.814393', 'step': 15536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:22.844938', 'step': 15536, 'epoch': 3} {'type': 'loss', 'content': 0.08412434160709381, 'timestamp': '2025-09-10 02:44:22.847441', 'step': 15537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:22.878398', 'step': 15537, 'epoch': 3} {'type': 'loss', 'content': 0.07670220732688904, 'timestamp': '2025-09-10 02:44:22.880925', 'step': 15538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:22.911458', 'step': 15538, 'epoch': 3} {'type': 'loss', 'content': 0.14059853553771973, 'timestamp': '2025-09-10 02:44:22.913714', 'step': 15539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:22.943828', 'step': 15539, 'epoch': 3} {'type': 'loss', 'content': 0.11140810698270798, 'timestamp': '2025-09-10 02:44:22.967331', 'step': 15540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:22.997264', 'step': 15540, 'epoch': 3} {'type': 'loss', 'content': 0.08357295393943787, 'timestamp': '2025-09-10 02:44:22.999585', 'step': 15541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:23.029921', 'step': 15541, 'epoch': 3} {'type': 'loss', 'content': 0.10212172567844391, 'timestamp': '2025-09-10 02:44:23.032403', 'step': 15542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.062586', 'step': 15542, 'epoch': 3} {'type': 'loss', 'content': 0.08511791378259659, 'timestamp': '2025-09-10 02:44:23.066442', 'step': 15543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:23.096195', 'step': 15543, 'epoch': 3} {'type': 'loss', 'content': 0.06638798117637634, 'timestamp': '2025-09-10 02:44:23.119866', 'step': 15544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.151124', 'step': 15544, 'epoch': 3} {'type': 'loss', 'content': 0.03497668728232384, 'timestamp': '2025-09-10 02:44:23.153340', 'step': 15545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:23.183349', 'step': 15545, 'epoch': 3} {'type': 'loss', 'content': 0.11239258199930191, 'timestamp': '2025-09-10 02:44:23.185697', 'step': 15546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.215520', 'step': 15546, 'epoch': 3} {'type': 'loss', 'content': 0.1886288821697235, 'timestamp': '2025-09-10 02:44:23.218312', 'step': 15547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:23.247887', 'step': 15547, 'epoch': 3} {'type': 'loss', 'content': 0.08558864891529083, 'timestamp': '2025-09-10 02:44:23.271411', 'step': 15548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:23.302130', 'step': 15548, 'epoch': 3} {'type': 'loss', 'content': 0.13458722829818726, 'timestamp': '2025-09-10 02:44:23.304585', 'step': 15549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:23.334457', 'step': 15549, 'epoch': 3} {'type': 'loss', 'content': 0.022392332553863525, 'timestamp': '2025-09-10 02:44:23.336735', 'step': 15550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.366616', 'step': 15550, 'epoch': 3} {'type': 'loss', 'content': 0.06935039162635803, 'timestamp': '2025-09-10 02:44:23.370458', 'step': 15551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.401280', 'step': 15551, 'epoch': 3} {'type': 'loss', 'content': 0.06209033355116844, 'timestamp': '2025-09-10 02:44:23.424855', 'step': 15552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:23.454623', 'step': 15552, 'epoch': 3} {'type': 'loss', 'content': 0.1001925989985466, 'timestamp': '2025-09-10 02:44:23.456981', 'step': 15553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:23.487595', 'step': 15553, 'epoch': 3} {'type': 'loss', 'content': 0.09969601780176163, 'timestamp': '2025-09-10 02:44:23.490193', 'step': 15554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:23.520122', 'step': 15554, 'epoch': 3} {'type': 'loss', 'content': 0.04414115846157074, 'timestamp': '2025-09-10 02:44:23.522629', 'step': 15555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:23.552560', 'step': 15555, 'epoch': 3} {'type': 'loss', 'content': 0.07680189609527588, 'timestamp': '2025-09-10 02:44:23.576574', 'step': 15556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:23.607227', 'step': 15556, 'epoch': 3} {'type': 'loss', 'content': 0.08102818578481674, 'timestamp': '2025-09-10 02:44:23.609427', 'step': 15557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:23.639811', 'step': 15557, 'epoch': 3} {'type': 'loss', 'content': 0.11476262658834457, 'timestamp': '2025-09-10 02:44:23.642316', 'step': 15558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.673249', 'step': 15558, 'epoch': 3} {'type': 'loss', 'content': 0.12824851274490356, 'timestamp': '2025-09-10 02:44:23.676352', 'step': 15559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:23.706864', 'step': 15559, 'epoch': 3} {'type': 'loss', 'content': 0.026832913979887962, 'timestamp': '2025-09-10 02:44:23.730491', 'step': 15560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.759861', 'step': 15560, 'epoch': 3} {'type': 'loss', 'content': 0.06483535468578339, 'timestamp': '2025-09-10 02:44:23.762093', 'step': 15561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.791879', 'step': 15561, 'epoch': 3} {'type': 'loss', 'content': 0.015517794527113438, 'timestamp': '2025-09-10 02:44:23.794192', 'step': 15562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.823262', 'step': 15562, 'epoch': 3} {'type': 'loss', 'content': 0.09273406118154526, 'timestamp': '2025-09-10 02:44:23.825483', 'step': 15563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.855251', 'step': 15563, 'epoch': 3} {'type': 'loss', 'content': 0.12932752072811127, 'timestamp': '2025-09-10 02:44:23.878838', 'step': 15564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:23.908596', 'step': 15564, 'epoch': 3} {'type': 'loss', 'content': 0.08839923143386841, 'timestamp': '2025-09-10 02:44:23.910642', 'step': 15565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:23.940583', 'step': 15565, 'epoch': 3} {'type': 'loss', 'content': 0.052223216742277145, 'timestamp': '2025-09-10 02:44:23.942853', 'step': 15566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:23.972807', 'step': 15566, 'epoch': 3} {'type': 'loss', 'content': 0.1455945372581482, 'timestamp': '2025-09-10 02:44:23.975497', 'step': 15567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:24.006791', 'step': 15567, 'epoch': 3} {'type': 'loss', 'content': 0.12688249349594116, 'timestamp': '2025-09-10 02:44:24.030484', 'step': 15568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.061278', 'step': 15568, 'epoch': 3} {'type': 'loss', 'content': 0.11238396167755127, 'timestamp': '2025-09-10 02:44:24.063600', 'step': 15569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:24.093799', 'step': 15569, 'epoch': 3} {'type': 'loss', 'content': 0.08915668725967407, 'timestamp': '2025-09-10 02:44:24.096115', 'step': 15570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:24.126536', 'step': 15570, 'epoch': 3} {'type': 'loss', 'content': 0.11628922820091248, 'timestamp': '2025-09-10 02:44:24.129181', 'step': 15571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:24.158850', 'step': 15571, 'epoch': 3} {'type': 'loss', 'content': 0.039260074496269226, 'timestamp': '2025-09-10 02:44:24.182393', 'step': 15572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:24.211887', 'step': 15572, 'epoch': 3} {'type': 'loss', 'content': 0.10372466593980789, 'timestamp': '2025-09-10 02:44:24.214377', 'step': 15573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.244656', 'step': 15573, 'epoch': 3} {'type': 'loss', 'content': 0.0661199614405632, 'timestamp': '2025-09-10 02:44:24.246912', 'step': 15574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:24.283359', 'step': 15574, 'epoch': 3} {'type': 'loss', 'content': 0.15814080834388733, 'timestamp': '2025-09-10 02:44:24.285768', 'step': 15575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.316004', 'step': 15575, 'epoch': 3} {'type': 'loss', 'content': 0.04672980681061745, 'timestamp': '2025-09-10 02:44:24.339535', 'step': 15576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:24.369665', 'step': 15576, 'epoch': 3} {'type': 'loss', 'content': 0.055645573884248734, 'timestamp': '2025-09-10 02:44:24.371911', 'step': 15577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.401571', 'step': 15577, 'epoch': 3} {'type': 'loss', 'content': 0.05941179394721985, 'timestamp': '2025-09-10 02:44:24.403771', 'step': 15578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:24.433891', 'step': 15578, 'epoch': 3} {'type': 'loss', 'content': 0.06751769036054611, 'timestamp': '2025-09-10 02:44:24.436185', 'step': 15579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.466885', 'step': 15579, 'epoch': 3} {'type': 'loss', 'content': 0.08418767154216766, 'timestamp': '2025-09-10 02:44:24.490304', 'step': 15580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.519920', 'step': 15580, 'epoch': 3} {'type': 'loss', 'content': 0.16528485715389252, 'timestamp': '2025-09-10 02:44:24.521861', 'step': 15581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.551644', 'step': 15581, 'epoch': 3} {'type': 'loss', 'content': 0.11858615279197693, 'timestamp': '2025-09-10 02:44:24.553958', 'step': 15582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.583426', 'step': 15582, 'epoch': 3} {'type': 'loss', 'content': 0.15773043036460876, 'timestamp': '2025-09-10 02:44:24.586540', 'step': 15583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.616498', 'step': 15583, 'epoch': 3} {'type': 'loss', 'content': 0.05795424431562424, 'timestamp': '2025-09-10 02:44:24.639991', 'step': 15584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.669926', 'step': 15584, 'epoch': 3} {'type': 'loss', 'content': 0.0710451677441597, 'timestamp': '2025-09-10 02:44:24.672157', 'step': 15585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:24.705294', 'step': 15585, 'epoch': 3} {'type': 'loss', 'content': 0.07007604837417603, 'timestamp': '2025-09-10 02:44:24.707482', 'step': 15586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:24.736994', 'step': 15586, 'epoch': 3} {'type': 'loss', 'content': 0.13739514350891113, 'timestamp': '2025-09-10 02:44:24.739344', 'step': 15587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:24.769900', 'step': 15587, 'epoch': 3} {'type': 'loss', 'content': 0.09989160299301147, 'timestamp': '2025-09-10 02:44:24.793311', 'step': 15588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:24.825541', 'step': 15588, 'epoch': 3} {'type': 'loss', 'content': 0.16519390046596527, 'timestamp': '2025-09-10 02:44:24.827881', 'step': 15589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:44:24.857664', 'step': 15589, 'epoch': 3} {'type': 'loss', 'content': 0.06565017253160477, 'timestamp': '2025-09-10 02:44:24.861984', 'step': 15590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:24.892423', 'step': 15590, 'epoch': 3} {'type': 'loss', 'content': 0.09449232369661331, 'timestamp': '2025-09-10 02:44:24.894590', 'step': 15591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:24.924503', 'step': 15591, 'epoch': 3} {'type': 'loss', 'content': 0.05137063190340996, 'timestamp': '2025-09-10 02:44:24.947754', 'step': 15592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:24.977416', 'step': 15592, 'epoch': 3} {'type': 'loss', 'content': 0.07110463827848434, 'timestamp': '2025-09-10 02:44:24.979356', 'step': 15593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.009323', 'step': 15593, 'epoch': 3} {'type': 'loss', 'content': 0.07435328513383865, 'timestamp': '2025-09-10 02:44:25.011467', 'step': 15594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.041647', 'step': 15594, 'epoch': 3} {'type': 'loss', 'content': 0.09458824247121811, 'timestamp': '2025-09-10 02:44:25.043921', 'step': 15595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:25.073759', 'step': 15595, 'epoch': 3} {'type': 'loss', 'content': 0.018099535256624222, 'timestamp': '2025-09-10 02:44:25.097414', 'step': 15596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.128150', 'step': 15596, 'epoch': 3} {'type': 'loss', 'content': 0.10503850877285004, 'timestamp': '2025-09-10 02:44:25.131172', 'step': 15597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.163365', 'step': 15597, 'epoch': 3} {'type': 'loss', 'content': 0.07890109717845917, 'timestamp': '2025-09-10 02:44:25.165720', 'step': 15598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:25.196394', 'step': 15598, 'epoch': 3} {'type': 'loss', 'content': 0.10692852735519409, 'timestamp': '2025-09-10 02:44:25.198678', 'step': 15599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:25.229548', 'step': 15599, 'epoch': 3} {'type': 'loss', 'content': 0.09353894740343094, 'timestamp': '2025-09-10 02:44:25.253724', 'step': 15600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:25.283285', 'step': 15600, 'epoch': 3} {'type': 'loss', 'content': 0.06327972561120987, 'timestamp': '2025-09-10 02:44:25.285446', 'step': 15601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:25.315881', 'step': 15601, 'epoch': 3} {'type': 'loss', 'content': 0.10534653067588806, 'timestamp': '2025-09-10 02:44:25.317863', 'step': 15602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:25.347184', 'step': 15602, 'epoch': 3} {'type': 'loss', 'content': 0.08993873745203018, 'timestamp': '2025-09-10 02:44:25.350646', 'step': 15603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:25.382449', 'step': 15603, 'epoch': 3} {'type': 'loss', 'content': 0.04970729351043701, 'timestamp': '2025-09-10 02:44:25.405977', 'step': 15604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:25.436051', 'step': 15604, 'epoch': 3} {'type': 'loss', 'content': 0.09328275918960571, 'timestamp': '2025-09-10 02:44:25.438325', 'step': 15605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.467921', 'step': 15605, 'epoch': 3} {'type': 'loss', 'content': 0.10344645380973816, 'timestamp': '2025-09-10 02:44:25.470057', 'step': 15606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:25.499947', 'step': 15606, 'epoch': 3} {'type': 'loss', 'content': 0.030387144535779953, 'timestamp': '2025-09-10 02:44:25.502254', 'step': 15607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:25.531490', 'step': 15607, 'epoch': 3} {'type': 'loss', 'content': 0.07031653821468353, 'timestamp': '2025-09-10 02:44:25.555055', 'step': 15608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:25.585202', 'step': 15608, 'epoch': 3} {'type': 'loss', 'content': 0.09442047774791718, 'timestamp': '2025-09-10 02:44:25.587754', 'step': 15609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:25.617653', 'step': 15609, 'epoch': 3} {'type': 'loss', 'content': 0.09342700242996216, 'timestamp': '2025-09-10 02:44:25.620127', 'step': 15610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.650047', 'step': 15610, 'epoch': 3} {'type': 'loss', 'content': 0.08835935592651367, 'timestamp': '2025-09-10 02:44:25.652164', 'step': 15611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:25.681546', 'step': 15611, 'epoch': 3} {'type': 'loss', 'content': 0.09678055346012115, 'timestamp': '2025-09-10 02:44:25.705262', 'step': 15612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.735669', 'step': 15612, 'epoch': 3} {'type': 'loss', 'content': 0.08563542366027832, 'timestamp': '2025-09-10 02:44:25.738162', 'step': 15613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.768220', 'step': 15613, 'epoch': 3} {'type': 'loss', 'content': 0.1731574535369873, 'timestamp': '2025-09-10 02:44:25.770502', 'step': 15614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:25.800306', 'step': 15614, 'epoch': 3} {'type': 'loss', 'content': 0.1353757679462433, 'timestamp': '2025-09-10 02:44:25.803737', 'step': 15615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:25.833495', 'step': 15615, 'epoch': 3} {'type': 'loss', 'content': 0.08114489167928696, 'timestamp': '2025-09-10 02:44:25.857078', 'step': 15616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.887676', 'step': 15616, 'epoch': 3} {'type': 'loss', 'content': 0.12095742672681808, 'timestamp': '2025-09-10 02:44:25.889851', 'step': 15617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:25.919822', 'step': 15617, 'epoch': 3} {'type': 'loss', 'content': 0.06661155819892883, 'timestamp': '2025-09-10 02:44:25.922359', 'step': 15618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.952976', 'step': 15618, 'epoch': 3} {'type': 'loss', 'content': 0.0736367478966713, 'timestamp': '2025-09-10 02:44:25.955197', 'step': 15619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:25.985514', 'step': 15619, 'epoch': 3} {'type': 'loss', 'content': 0.047782398760318756, 'timestamp': '2025-09-10 02:44:26.008908', 'step': 15620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:26.038724', 'step': 15620, 'epoch': 3} {'type': 'loss', 'content': 0.06828153878450394, 'timestamp': '2025-09-10 02:44:26.040773', 'step': 15621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:26.071123', 'step': 15621, 'epoch': 3} {'type': 'loss', 'content': 0.027956970036029816, 'timestamp': '2025-09-10 02:44:26.073560', 'step': 15622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.103539', 'step': 15622, 'epoch': 3} {'type': 'loss', 'content': 0.06545563787221909, 'timestamp': '2025-09-10 02:44:26.106638', 'step': 15623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:26.136846', 'step': 15623, 'epoch': 3} {'type': 'loss', 'content': 0.19509585201740265, 'timestamp': '2025-09-10 02:44:26.160581', 'step': 15624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:26.190630', 'step': 15624, 'epoch': 3} {'type': 'loss', 'content': 0.08659584075212479, 'timestamp': '2025-09-10 02:44:26.193098', 'step': 15625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.222689', 'step': 15625, 'epoch': 3} {'type': 'loss', 'content': 0.05928003415465355, 'timestamp': '2025-09-10 02:44:26.224997', 'step': 15626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:26.257024', 'step': 15626, 'epoch': 3} {'type': 'loss', 'content': 0.14674457907676697, 'timestamp': '2025-09-10 02:44:26.259347', 'step': 15627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.289084', 'step': 15627, 'epoch': 3} {'type': 'loss', 'content': 0.04480673745274544, 'timestamp': '2025-09-10 02:44:26.312591', 'step': 15628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:26.342582', 'step': 15628, 'epoch': 3} {'type': 'loss', 'content': 0.025556523352861404, 'timestamp': '2025-09-10 02:44:26.345071', 'step': 15629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.375533', 'step': 15629, 'epoch': 3} {'type': 'loss', 'content': 0.08208227157592773, 'timestamp': '2025-09-10 02:44:26.377882', 'step': 15630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.407873', 'step': 15630, 'epoch': 3} {'type': 'loss', 'content': 0.0874045342206955, 'timestamp': '2025-09-10 02:44:26.413120', 'step': 15631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:26.444134', 'step': 15631, 'epoch': 3} {'type': 'loss', 'content': 0.06825393438339233, 'timestamp': '2025-09-10 02:44:26.467581', 'step': 15632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:26.497744', 'step': 15632, 'epoch': 3} {'type': 'loss', 'content': 0.06651852279901505, 'timestamp': '2025-09-10 02:44:26.500409', 'step': 15633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.530593', 'step': 15633, 'epoch': 3} {'type': 'loss', 'content': 0.0956447571516037, 'timestamp': '2025-09-10 02:44:26.532762', 'step': 15634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.562641', 'step': 15634, 'epoch': 3} {'type': 'loss', 'content': 0.05765480920672417, 'timestamp': '2025-09-10 02:44:26.564834', 'step': 15635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:26.594689', 'step': 15635, 'epoch': 3} {'type': 'loss', 'content': 0.061058368533849716, 'timestamp': '2025-09-10 02:44:26.617889', 'step': 15636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:26.649581', 'step': 15636, 'epoch': 3} {'type': 'loss', 'content': 0.02167082577943802, 'timestamp': '2025-09-10 02:44:26.652868', 'step': 15637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.684047', 'step': 15637, 'epoch': 3} {'type': 'loss', 'content': 0.08646606653928757, 'timestamp': '2025-09-10 02:44:26.687393', 'step': 15638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:26.719239', 'step': 15638, 'epoch': 3} {'type': 'loss', 'content': 0.1082804724574089, 'timestamp': '2025-09-10 02:44:26.721607', 'step': 15639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:26.752951', 'step': 15639, 'epoch': 3} {'type': 'loss', 'content': 0.024199755862355232, 'timestamp': '2025-09-10 02:44:26.776504', 'step': 15640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.806944', 'step': 15640, 'epoch': 3} {'type': 'loss', 'content': 0.04998330771923065, 'timestamp': '2025-09-10 02:44:26.808999', 'step': 15641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:26.839140', 'step': 15641, 'epoch': 3} {'type': 'loss', 'content': 0.08882257342338562, 'timestamp': '2025-09-10 02:44:26.841419', 'step': 15642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:26.871126', 'step': 15642, 'epoch': 3} {'type': 'loss', 'content': 0.1054050400853157, 'timestamp': '2025-09-10 02:44:26.882943', 'step': 15643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:26.912733', 'step': 15643, 'epoch': 3} {'type': 'loss', 'content': 0.014272044412791729, 'timestamp': '2025-09-10 02:44:26.938235', 'step': 15644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:26.968515', 'step': 15644, 'epoch': 3} {'type': 'loss', 'content': 0.08637533336877823, 'timestamp': '2025-09-10 02:44:26.970733', 'step': 15645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:27.000938', 'step': 15645, 'epoch': 3} {'type': 'loss', 'content': 0.08442625403404236, 'timestamp': '2025-09-10 02:44:27.002894', 'step': 15646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:27.033027', 'step': 15646, 'epoch': 3} {'type': 'loss', 'content': 0.08081243187189102, 'timestamp': '2025-09-10 02:44:27.035280', 'step': 15647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:27.065708', 'step': 15647, 'epoch': 3} {'type': 'loss', 'content': 0.04120125621557236, 'timestamp': '2025-09-10 02:44:27.089452', 'step': 15648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:27.120051', 'step': 15648, 'epoch': 3} {'type': 'loss', 'content': 0.09923315048217773, 'timestamp': '2025-09-10 02:44:27.122259', 'step': 15649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:27.152191', 'step': 15649, 'epoch': 3} {'type': 'loss', 'content': 0.13438089191913605, 'timestamp': '2025-09-10 02:44:27.154392', 'step': 15650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:27.183888', 'step': 15650, 'epoch': 3} {'type': 'loss', 'content': 0.05987502634525299, 'timestamp': '2025-09-10 02:44:27.186192', 'step': 15651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:27.216877', 'step': 15651, 'epoch': 3} {'type': 'loss', 'content': 0.03215353190898895, 'timestamp': '2025-09-10 02:44:27.241408', 'step': 15652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:27.271464', 'step': 15652, 'epoch': 3} {'type': 'loss', 'content': 0.1099180206656456, 'timestamp': '2025-09-10 02:44:27.273730', 'step': 15653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:27.303576', 'step': 15653, 'epoch': 3} {'type': 'loss', 'content': 0.11231868714094162, 'timestamp': '2025-09-10 02:44:27.306147', 'step': 15654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:27.335664', 'step': 15654, 'epoch': 3} {'type': 'loss', 'content': 0.14955949783325195, 'timestamp': '2025-09-10 02:44:27.337723', 'step': 15655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:27.367407', 'step': 15655, 'epoch': 3} {'type': 'loss', 'content': 0.0325891338288784, 'timestamp': '2025-09-10 02:44:27.390894', 'step': 15656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:27.420413', 'step': 15656, 'epoch': 3} {'type': 'loss', 'content': 0.08908512443304062, 'timestamp': '2025-09-10 02:44:27.426412', 'step': 15657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:27.458708', 'step': 15657, 'epoch': 3} {'type': 'loss', 'content': 0.09582442790269852, 'timestamp': '2025-09-10 02:44:27.460892', 'step': 15658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:27.490428', 'step': 15658, 'epoch': 3} {'type': 'loss', 'content': 0.12489698827266693, 'timestamp': '2025-09-10 02:44:27.492604', 'step': 15659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:27.523465', 'step': 15659, 'epoch': 3} {'type': 'loss', 'content': 0.037587959319353104, 'timestamp': '2025-09-10 02:44:27.546890', 'step': 15660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:27.577127', 'step': 15660, 'epoch': 3} {'type': 'loss', 'content': 0.09893029928207397, 'timestamp': '2025-09-10 02:44:27.579086', 'step': 15661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:27.608651', 'step': 15661, 'epoch': 3} {'type': 'loss', 'content': 0.04000771790742874, 'timestamp': '2025-09-10 02:44:27.610688', 'step': 15662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:27.640289', 'step': 15662, 'epoch': 3} {'type': 'loss', 'content': 0.18032091856002808, 'timestamp': '2025-09-10 02:44:27.642508', 'step': 15663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:27.671915', 'step': 15663, 'epoch': 3} {'type': 'loss', 'content': 0.09877223521471024, 'timestamp': '2025-09-10 02:44:27.695110', 'step': 15664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:27.724965', 'step': 15664, 'epoch': 3} {'type': 'loss', 'content': 0.04130254685878754, 'timestamp': '2025-09-10 02:44:27.727272', 'step': 15665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:27.757283', 'step': 15665, 'epoch': 3} {'type': 'loss', 'content': 0.04926422983407974, 'timestamp': '2025-09-10 02:44:27.759521', 'step': 15666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:27.789524', 'step': 15666, 'epoch': 3} {'type': 'loss', 'content': 0.13848672807216644, 'timestamp': '2025-09-10 02:44:27.792155', 'step': 15667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:27.822311', 'step': 15667, 'epoch': 3} {'type': 'loss', 'content': 0.09530704468488693, 'timestamp': '2025-09-10 02:44:27.846166', 'step': 15668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:27.876035', 'step': 15668, 'epoch': 3} {'type': 'loss', 'content': 0.08223547041416168, 'timestamp': '2025-09-10 02:44:27.877962', 'step': 15669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:27.907480', 'step': 15669, 'epoch': 3} {'type': 'loss', 'content': 0.12810151278972626, 'timestamp': '2025-09-10 02:44:27.911842', 'step': 15670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:27.942998', 'step': 15670, 'epoch': 3} {'type': 'loss', 'content': 0.11031458526849747, 'timestamp': '2025-09-10 02:44:27.945188', 'step': 15671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:27.974872', 'step': 15671, 'epoch': 3} {'type': 'loss', 'content': 0.12547393143177032, 'timestamp': '2025-09-10 02:44:27.998366', 'step': 15672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:28.028418', 'step': 15672, 'epoch': 3} {'type': 'loss', 'content': 0.1218908354640007, 'timestamp': '2025-09-10 02:44:28.031498', 'step': 15673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:28.061418', 'step': 15673, 'epoch': 3} {'type': 'loss', 'content': 0.13841591775417328, 'timestamp': '2025-09-10 02:44:28.063695', 'step': 15674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:28.093278', 'step': 15674, 'epoch': 3} {'type': 'loss', 'content': 0.051371943205595016, 'timestamp': '2025-09-10 02:44:28.095621', 'step': 15675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:28.126341', 'step': 15675, 'epoch': 3} {'type': 'loss', 'content': 0.07396083325147629, 'timestamp': '2025-09-10 02:44:28.149849', 'step': 15676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:28.180535', 'step': 15676, 'epoch': 3} {'type': 'loss', 'content': 0.049506377428770065, 'timestamp': '2025-09-10 02:44:28.184105', 'step': 15677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:28.217971', 'step': 15677, 'epoch': 3} {'type': 'loss', 'content': 0.1924760937690735, 'timestamp': '2025-09-10 02:44:28.220228', 'step': 15678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:28.249813', 'step': 15678, 'epoch': 3} {'type': 'loss', 'content': 0.037514619529247284, 'timestamp': '2025-09-10 02:44:28.252178', 'step': 15679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:28.282327', 'step': 15679, 'epoch': 3} {'type': 'loss', 'content': 0.037463586777448654, 'timestamp': '2025-09-10 02:44:28.305989', 'step': 15680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:28.336099', 'step': 15680, 'epoch': 3} {'type': 'loss', 'content': 0.06882739067077637, 'timestamp': '2025-09-10 02:44:28.338569', 'step': 15681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:28.368843', 'step': 15681, 'epoch': 3} {'type': 'loss', 'content': 0.06169717758893967, 'timestamp': '2025-09-10 02:44:28.371411', 'step': 15682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:28.401056', 'step': 15682, 'epoch': 3} {'type': 'loss', 'content': 0.04494480416178703, 'timestamp': '2025-09-10 02:44:28.403470', 'step': 15683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:28.435226', 'step': 15683, 'epoch': 3} {'type': 'loss', 'content': 0.0377691350877285, 'timestamp': '2025-09-10 02:44:28.458579', 'step': 15684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:28.487770', 'step': 15684, 'epoch': 3} {'type': 'loss', 'content': 0.12624891102313995, 'timestamp': '2025-09-10 02:44:28.490190', 'step': 15685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:28.519948', 'step': 15685, 'epoch': 3} {'type': 'loss', 'content': 0.12019313871860504, 'timestamp': '2025-09-10 02:44:28.522268', 'step': 15686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:28.551803', 'step': 15686, 'epoch': 3} {'type': 'loss', 'content': 0.0248293224722147, 'timestamp': '2025-09-10 02:44:28.553860', 'step': 15687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:28.583645', 'step': 15687, 'epoch': 3} {'type': 'loss', 'content': 0.1461271196603775, 'timestamp': '2025-09-10 02:44:28.607215', 'step': 15688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:28.637641', 'step': 15688, 'epoch': 3} {'type': 'loss', 'content': 0.07967514544725418, 'timestamp': '2025-09-10 02:44:28.639899', 'step': 15689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:28.669300', 'step': 15689, 'epoch': 3} {'type': 'loss', 'content': 0.053174927830696106, 'timestamp': '2025-09-10 02:44:28.671906', 'step': 15690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:28.702527', 'step': 15690, 'epoch': 3} {'type': 'loss', 'content': 0.07736928761005402, 'timestamp': '2025-09-10 02:44:28.704749', 'step': 15691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:28.734900', 'step': 15691, 'epoch': 3} {'type': 'loss', 'content': 0.012287165969610214, 'timestamp': '2025-09-10 02:44:28.758916', 'step': 15692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:28.789006', 'step': 15692, 'epoch': 3} {'type': 'loss', 'content': 0.0813361257314682, 'timestamp': '2025-09-10 02:44:28.791252', 'step': 15693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:28.820720', 'step': 15693, 'epoch': 3} {'type': 'loss', 'content': 0.08014366030693054, 'timestamp': '2025-09-10 02:44:28.822712', 'step': 15694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:28.852427', 'step': 15694, 'epoch': 3} {'type': 'loss', 'content': 0.05875624343752861, 'timestamp': '2025-09-10 02:44:28.854746', 'step': 15695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:28.884386', 'step': 15695, 'epoch': 3} {'type': 'loss', 'content': 0.061782535165548325, 'timestamp': '2025-09-10 02:44:28.908380', 'step': 15696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:28.938304', 'step': 15696, 'epoch': 3} {'type': 'loss', 'content': 0.14148075878620148, 'timestamp': '2025-09-10 02:44:28.942120', 'step': 15697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:28.972420', 'step': 15697, 'epoch': 3} {'type': 'loss', 'content': 0.011763696558773518, 'timestamp': '2025-09-10 02:44:28.974574', 'step': 15698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:29.004284', 'step': 15698, 'epoch': 3} {'type': 'loss', 'content': 0.0918666422367096, 'timestamp': '2025-09-10 02:44:29.006617', 'step': 15699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:29.036875', 'step': 15699, 'epoch': 3} {'type': 'loss', 'content': 0.09669627994298935, 'timestamp': '2025-09-10 02:44:29.060248', 'step': 15700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:29.090227', 'step': 15700, 'epoch': 3} {'type': 'loss', 'content': 0.03846022114157677, 'timestamp': '2025-09-10 02:44:29.092467', 'step': 15701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:29.122903', 'step': 15701, 'epoch': 3} {'type': 'loss', 'content': 0.07296881079673767, 'timestamp': '2025-09-10 02:44:29.125387', 'step': 15702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:29.155457', 'step': 15702, 'epoch': 3} {'type': 'loss', 'content': 0.0808391273021698, 'timestamp': '2025-09-10 02:44:29.157685', 'step': 15703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:29.188191', 'step': 15703, 'epoch': 3} {'type': 'loss', 'content': 0.07541278004646301, 'timestamp': '2025-09-10 02:44:29.211731', 'step': 15704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:29.242274', 'step': 15704, 'epoch': 3} {'type': 'loss', 'content': 0.07781243324279785, 'timestamp': '2025-09-10 02:44:29.246223', 'step': 15705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:29.276409', 'step': 15705, 'epoch': 3} {'type': 'loss', 'content': 0.1444828063249588, 'timestamp': '2025-09-10 02:44:29.278589', 'step': 15706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:29.308855', 'step': 15706, 'epoch': 3} {'type': 'loss', 'content': 0.08068141341209412, 'timestamp': '2025-09-10 02:44:29.311364', 'step': 15707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:29.341274', 'step': 15707, 'epoch': 3} {'type': 'loss', 'content': 0.03288872912526131, 'timestamp': '2025-09-10 02:44:29.364766', 'step': 15708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:29.395132', 'step': 15708, 'epoch': 3} {'type': 'loss', 'content': 0.03602437674999237, 'timestamp': '2025-09-10 02:44:29.397417', 'step': 15709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:29.427173', 'step': 15709, 'epoch': 3} {'type': 'loss', 'content': 0.11389988660812378, 'timestamp': '2025-09-10 02:44:29.429904', 'step': 15710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:29.464032', 'step': 15710, 'epoch': 3} {'type': 'loss', 'content': 0.1134631410241127, 'timestamp': '2025-09-10 02:44:29.466425', 'step': 15711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:29.495906', 'step': 15711, 'epoch': 3} {'type': 'loss', 'content': 0.06902019679546356, 'timestamp': '2025-09-10 02:44:29.519460', 'step': 15712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:29.549129', 'step': 15712, 'epoch': 3} {'type': 'loss', 'content': 0.04970073699951172, 'timestamp': '2025-09-10 02:44:29.551331', 'step': 15713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:29.580820', 'step': 15713, 'epoch': 3} {'type': 'loss', 'content': 0.07561797648668289, 'timestamp': '2025-09-10 02:44:29.582821', 'step': 15714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:29.612275', 'step': 15714, 'epoch': 3} {'type': 'loss', 'content': 0.07705112546682358, 'timestamp': '2025-09-10 02:44:29.614332', 'step': 15715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:29.644061', 'step': 15715, 'epoch': 3} {'type': 'loss', 'content': 0.06000013276934624, 'timestamp': '2025-09-10 02:44:29.668617', 'step': 15716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:29.699419', 'step': 15716, 'epoch': 3} {'type': 'loss', 'content': 0.0391639769077301, 'timestamp': '2025-09-10 02:44:29.701741', 'step': 15717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:29.732221', 'step': 15717, 'epoch': 3} {'type': 'loss', 'content': 0.032550424337387085, 'timestamp': '2025-09-10 02:44:29.734548', 'step': 15718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:29.764346', 'step': 15718, 'epoch': 3} {'type': 'loss', 'content': 0.06168397516012192, 'timestamp': '2025-09-10 02:44:29.766740', 'step': 15719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:29.797122', 'step': 15719, 'epoch': 3} {'type': 'loss', 'content': 0.05616714805364609, 'timestamp': '2025-09-10 02:44:29.820379', 'step': 15720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:29.850912', 'step': 15720, 'epoch': 3} {'type': 'loss', 'content': 0.10476697236299515, 'timestamp': '2025-09-10 02:44:29.854725', 'step': 15721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:29.885424', 'step': 15721, 'epoch': 3} {'type': 'loss', 'content': 0.04971297085285187, 'timestamp': '2025-09-10 02:44:29.887610', 'step': 15722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:29.917847', 'step': 15722, 'epoch': 3} {'type': 'loss', 'content': 0.13792000710964203, 'timestamp': '2025-09-10 02:44:29.919978', 'step': 15723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:29.950105', 'step': 15723, 'epoch': 3} {'type': 'loss', 'content': 0.0433625653386116, 'timestamp': '2025-09-10 02:44:29.973692', 'step': 15724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.004042', 'step': 15724, 'epoch': 3} {'type': 'loss', 'content': 0.017552142962813377, 'timestamp': '2025-09-10 02:44:30.006256', 'step': 15725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.035694', 'step': 15725, 'epoch': 3} {'type': 'loss', 'content': 0.10557495057582855, 'timestamp': '2025-09-10 02:44:30.038182', 'step': 15726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:30.068682', 'step': 15726, 'epoch': 3} {'type': 'loss', 'content': 0.07035589218139648, 'timestamp': '2025-09-10 02:44:30.070996', 'step': 15727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:30.100856', 'step': 15727, 'epoch': 3} {'type': 'loss', 'content': 0.10140322893857956, 'timestamp': '2025-09-10 02:44:30.124391', 'step': 15728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.155654', 'step': 15728, 'epoch': 3} {'type': 'loss', 'content': 0.09266569465398788, 'timestamp': '2025-09-10 02:44:30.159309', 'step': 15729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.191886', 'step': 15729, 'epoch': 3} {'type': 'loss', 'content': 0.07772856950759888, 'timestamp': '2025-09-10 02:44:30.194678', 'step': 15730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.225137', 'step': 15730, 'epoch': 3} {'type': 'loss', 'content': 0.05956588312983513, 'timestamp': '2025-09-10 02:44:30.227452', 'step': 15731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:30.257186', 'step': 15731, 'epoch': 3} {'type': 'loss', 'content': 0.12996689975261688, 'timestamp': '2025-09-10 02:44:30.281014', 'step': 15732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:30.310877', 'step': 15732, 'epoch': 3} {'type': 'loss', 'content': 0.03025769628584385, 'timestamp': '2025-09-10 02:44:30.313468', 'step': 15733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.343788', 'step': 15733, 'epoch': 3} {'type': 'loss', 'content': 0.04469416290521622, 'timestamp': '2025-09-10 02:44:30.346032', 'step': 15734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:44:30.376206', 'step': 15734, 'epoch': 3} {'type': 'loss', 'content': 0.09001784771680832, 'timestamp': '2025-09-10 02:44:30.380500', 'step': 15735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.410190', 'step': 15735, 'epoch': 3} {'type': 'loss', 'content': 0.036294322460889816, 'timestamp': '2025-09-10 02:44:30.436190', 'step': 15736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:30.466390', 'step': 15736, 'epoch': 3} {'type': 'loss', 'content': 0.12287615984678268, 'timestamp': '2025-09-10 02:44:30.468560', 'step': 15737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:30.499031', 'step': 15737, 'epoch': 3} {'type': 'loss', 'content': 0.09386125206947327, 'timestamp': '2025-09-10 02:44:30.501294', 'step': 15738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:30.531214', 'step': 15738, 'epoch': 3} {'type': 'loss', 'content': 0.09766030311584473, 'timestamp': '2025-09-10 02:44:30.533882', 'step': 15739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.563716', 'step': 15739, 'epoch': 3} {'type': 'loss', 'content': 0.08971596509218216, 'timestamp': '2025-09-10 02:44:30.588641', 'step': 15740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:30.618940', 'step': 15740, 'epoch': 3} {'type': 'loss', 'content': 0.06943701207637787, 'timestamp': '2025-09-10 02:44:30.621341', 'step': 15741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.652193', 'step': 15741, 'epoch': 3} {'type': 'loss', 'content': 0.03551223501563072, 'timestamp': '2025-09-10 02:44:30.654403', 'step': 15742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:30.684416', 'step': 15742, 'epoch': 3} {'type': 'loss', 'content': 0.052099280059337616, 'timestamp': '2025-09-10 02:44:30.686427', 'step': 15743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:30.715958', 'step': 15743, 'epoch': 3} {'type': 'loss', 'content': 0.0502253994345665, 'timestamp': '2025-09-10 02:44:30.739477', 'step': 15744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:30.770013', 'step': 15744, 'epoch': 3} {'type': 'loss', 'content': 0.10230562090873718, 'timestamp': '2025-09-10 02:44:30.772576', 'step': 15745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.803649', 'step': 15745, 'epoch': 3} {'type': 'loss', 'content': 0.04880800098180771, 'timestamp': '2025-09-10 02:44:30.805996', 'step': 15746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:30.836321', 'step': 15746, 'epoch': 3} {'type': 'loss', 'content': 0.10407255589962006, 'timestamp': '2025-09-10 02:44:30.838756', 'step': 15747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:30.868615', 'step': 15747, 'epoch': 3} {'type': 'loss', 'content': 0.09577629715204239, 'timestamp': '2025-09-10 02:44:30.892178', 'step': 15748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:30.922822', 'step': 15748, 'epoch': 3} {'type': 'loss', 'content': 0.06000206992030144, 'timestamp': '2025-09-10 02:44:30.925114', 'step': 15749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:30.954824', 'step': 15749, 'epoch': 3} {'type': 'loss', 'content': 0.07476600259542465, 'timestamp': '2025-09-10 02:44:30.957369', 'step': 15750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:30.987155', 'step': 15750, 'epoch': 3} {'type': 'loss', 'content': 0.05463066324591637, 'timestamp': '2025-09-10 02:44:30.989479', 'step': 15751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.019409', 'step': 15751, 'epoch': 3} {'type': 'loss', 'content': 0.10991695523262024, 'timestamp': '2025-09-10 02:44:31.042601', 'step': 15752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.073036', 'step': 15752, 'epoch': 3} {'type': 'loss', 'content': 0.040793873369693756, 'timestamp': '2025-09-10 02:44:31.075436', 'step': 15753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:31.105813', 'step': 15753, 'epoch': 3} {'type': 'loss', 'content': 0.05163269117474556, 'timestamp': '2025-09-10 02:44:31.108064', 'step': 15754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:31.140675', 'step': 15754, 'epoch': 3} {'type': 'loss', 'content': 0.056716933846473694, 'timestamp': '2025-09-10 02:44:31.143178', 'step': 15755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:31.173135', 'step': 15755, 'epoch': 3} {'type': 'loss', 'content': 0.03423529490828514, 'timestamp': '2025-09-10 02:44:31.196694', 'step': 15756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.227223', 'step': 15756, 'epoch': 3} {'type': 'loss', 'content': 0.10104339569807053, 'timestamp': '2025-09-10 02:44:31.229668', 'step': 15757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:31.261184', 'step': 15757, 'epoch': 3} {'type': 'loss', 'content': 0.08326490968465805, 'timestamp': '2025-09-10 02:44:31.263470', 'step': 15758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:31.294869', 'step': 15758, 'epoch': 3} {'type': 'loss', 'content': 0.16792722046375275, 'timestamp': '2025-09-10 02:44:31.300360', 'step': 15759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.341584', 'step': 15759, 'epoch': 3} {'type': 'loss', 'content': 0.10754904896020889, 'timestamp': '2025-09-10 02:44:31.368386', 'step': 15760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:31.399372', 'step': 15760, 'epoch': 3} {'type': 'loss', 'content': 0.17223718762397766, 'timestamp': '2025-09-10 02:44:31.401574', 'step': 15761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.431524', 'step': 15761, 'epoch': 3} {'type': 'loss', 'content': 0.021473299711942673, 'timestamp': '2025-09-10 02:44:31.433905', 'step': 15762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.463723', 'step': 15762, 'epoch': 3} {'type': 'loss', 'content': 0.14640997350215912, 'timestamp': '2025-09-10 02:44:31.466644', 'step': 15763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.498395', 'step': 15763, 'epoch': 3} {'type': 'loss', 'content': 0.0919019803404808, 'timestamp': '2025-09-10 02:44:31.521891', 'step': 15764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:31.552057', 'step': 15764, 'epoch': 3} {'type': 'loss', 'content': 0.08539919555187225, 'timestamp': '2025-09-10 02:44:31.554397', 'step': 15765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:31.583899', 'step': 15765, 'epoch': 3} {'type': 'loss', 'content': 0.11141849309206009, 'timestamp': '2025-09-10 02:44:31.586385', 'step': 15766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.617423', 'step': 15766, 'epoch': 3} {'type': 'loss', 'content': 0.04604440554976463, 'timestamp': '2025-09-10 02:44:31.621880', 'step': 15767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:31.651927', 'step': 15767, 'epoch': 3} {'type': 'loss', 'content': 0.09448947757482529, 'timestamp': '2025-09-10 02:44:31.675883', 'step': 15768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.705857', 'step': 15768, 'epoch': 3} {'type': 'loss', 'content': 0.09826941043138504, 'timestamp': '2025-09-10 02:44:31.708266', 'step': 15769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:31.738506', 'step': 15769, 'epoch': 3} {'type': 'loss', 'content': 0.07287929207086563, 'timestamp': '2025-09-10 02:44:31.740974', 'step': 15770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:31.771625', 'step': 15770, 'epoch': 3} {'type': 'loss', 'content': 0.08742638677358627, 'timestamp': '2025-09-10 02:44:31.773911', 'step': 15771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.804941', 'step': 15771, 'epoch': 3} {'type': 'loss', 'content': 0.05927983671426773, 'timestamp': '2025-09-10 02:44:31.828479', 'step': 15772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.858737', 'step': 15772, 'epoch': 3} {'type': 'loss', 'content': 0.00603075185790658, 'timestamp': '2025-09-10 02:44:31.860819', 'step': 15773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:31.890688', 'step': 15773, 'epoch': 3} {'type': 'loss', 'content': 0.10820014774799347, 'timestamp': '2025-09-10 02:44:31.896273', 'step': 15774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:31.934530', 'step': 15774, 'epoch': 3} {'type': 'loss', 'content': 0.05217002332210541, 'timestamp': '2025-09-10 02:44:31.937672', 'step': 15775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:31.970132', 'step': 15775, 'epoch': 3} {'type': 'loss', 'content': 0.09339118003845215, 'timestamp': '2025-09-10 02:44:31.994147', 'step': 15776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:32.024491', 'step': 15776, 'epoch': 3} {'type': 'loss', 'content': 0.07479223608970642, 'timestamp': '2025-09-10 02:44:32.026975', 'step': 15777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:32.057225', 'step': 15777, 'epoch': 3} {'type': 'loss', 'content': 0.21325968205928802, 'timestamp': '2025-09-10 02:44:32.059941', 'step': 15778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:32.089753', 'step': 15778, 'epoch': 3} {'type': 'loss', 'content': 0.023911016061902046, 'timestamp': '2025-09-10 02:44:32.092224', 'step': 15779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:32.121783', 'step': 15779, 'epoch': 3} {'type': 'loss', 'content': 0.04784640669822693, 'timestamp': '2025-09-10 02:44:32.145422', 'step': 15780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:32.176153', 'step': 15780, 'epoch': 3} {'type': 'loss', 'content': 0.05040920153260231, 'timestamp': '2025-09-10 02:44:32.178765', 'step': 15781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:32.210025', 'step': 15781, 'epoch': 3} {'type': 'loss', 'content': 0.06500338017940521, 'timestamp': '2025-09-10 02:44:32.212181', 'step': 15782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:32.242959', 'step': 15782, 'epoch': 3} {'type': 'loss', 'content': 0.178781658411026, 'timestamp': '2025-09-10 02:44:32.245388', 'step': 15783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:32.276237', 'step': 15783, 'epoch': 3} {'type': 'loss', 'content': 0.061322689056396484, 'timestamp': '2025-09-10 02:44:32.300112', 'step': 15784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:32.331052', 'step': 15784, 'epoch': 3} {'type': 'loss', 'content': 0.09470828622579575, 'timestamp': '2025-09-10 02:44:32.333361', 'step': 15785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:32.363662', 'step': 15785, 'epoch': 3} {'type': 'loss', 'content': 0.0911722481250763, 'timestamp': '2025-09-10 02:44:32.365771', 'step': 15786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:32.395414', 'step': 15786, 'epoch': 3} {'type': 'loss', 'content': 0.04903918504714966, 'timestamp': '2025-09-10 02:44:32.397710', 'step': 15787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:32.427238', 'step': 15787, 'epoch': 3} {'type': 'loss', 'content': 0.1545201539993286, 'timestamp': '2025-09-10 02:44:32.450691', 'step': 15788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:32.481658', 'step': 15788, 'epoch': 3} {'type': 'loss', 'content': 0.09814061224460602, 'timestamp': '2025-09-10 02:44:32.483809', 'step': 15789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:32.514710', 'step': 15789, 'epoch': 3} {'type': 'loss', 'content': 0.09822586923837662, 'timestamp': '2025-09-10 02:44:32.517334', 'step': 15790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:32.548980', 'step': 15790, 'epoch': 3} {'type': 'loss', 'content': 0.0443895198404789, 'timestamp': '2025-09-10 02:44:32.551408', 'step': 15791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:32.581716', 'step': 15791, 'epoch': 3} {'type': 'loss', 'content': 0.09895104169845581, 'timestamp': '2025-09-10 02:44:32.605160', 'step': 15792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:32.635753', 'step': 15792, 'epoch': 3} {'type': 'loss', 'content': 0.07350479811429977, 'timestamp': '2025-09-10 02:44:32.638021', 'step': 15793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:32.667703', 'step': 15793, 'epoch': 3} {'type': 'loss', 'content': 0.11036493629217148, 'timestamp': '2025-09-10 02:44:32.669783', 'step': 15794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:32.699810', 'step': 15794, 'epoch': 3} {'type': 'loss', 'content': 0.07277064025402069, 'timestamp': '2025-09-10 02:44:32.702087', 'step': 15795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:32.731840', 'step': 15795, 'epoch': 3} {'type': 'loss', 'content': 0.0934804379940033, 'timestamp': '2025-09-10 02:44:32.755426', 'step': 15796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:32.785436', 'step': 15796, 'epoch': 3} {'type': 'loss', 'content': 0.11234481632709503, 'timestamp': '2025-09-10 02:44:32.787533', 'step': 15797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:32.820185', 'step': 15797, 'epoch': 3} {'type': 'loss', 'content': 0.08698688447475433, 'timestamp': '2025-09-10 02:44:32.822595', 'step': 15798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:32.852222', 'step': 15798, 'epoch': 3} {'type': 'loss', 'content': 0.0779755711555481, 'timestamp': '2025-09-10 02:44:32.854438', 'step': 15799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:32.884363', 'step': 15799, 'epoch': 3} {'type': 'loss', 'content': 0.04013512283563614, 'timestamp': '2025-09-10 02:44:32.907863', 'step': 15800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:32.938233', 'step': 15800, 'epoch': 3} {'type': 'loss', 'content': 0.03955402970314026, 'timestamp': '2025-09-10 02:44:32.940378', 'step': 15801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:32.970962', 'step': 15801, 'epoch': 3} {'type': 'loss', 'content': 0.09177002310752869, 'timestamp': '2025-09-10 02:44:32.973247', 'step': 15802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:33.002965', 'step': 15802, 'epoch': 3} {'type': 'loss', 'content': 0.07979875802993774, 'timestamp': '2025-09-10 02:44:33.005183', 'step': 15803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:33.035297', 'step': 15803, 'epoch': 3} {'type': 'loss', 'content': 0.12236397713422775, 'timestamp': '2025-09-10 02:44:33.058628', 'step': 15804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:33.088521', 'step': 15804, 'epoch': 3} {'type': 'loss', 'content': 0.05815071612596512, 'timestamp': '2025-09-10 02:44:33.090597', 'step': 15805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:33.121979', 'step': 15805, 'epoch': 3} {'type': 'loss', 'content': 0.21996042132377625, 'timestamp': '2025-09-10 02:44:33.124855', 'step': 15806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:33.154522', 'step': 15806, 'epoch': 3} {'type': 'loss', 'content': 0.11228860169649124, 'timestamp': '2025-09-10 02:44:33.157259', 'step': 15807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:33.186582', 'step': 15807, 'epoch': 3} {'type': 'loss', 'content': 0.1335507482290268, 'timestamp': '2025-09-10 02:44:33.210374', 'step': 15808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:33.240683', 'step': 15808, 'epoch': 3} {'type': 'loss', 'content': 0.15480269491672516, 'timestamp': '2025-09-10 02:44:33.243029', 'step': 15809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:33.272632', 'step': 15809, 'epoch': 3} {'type': 'loss', 'content': 0.014124457724392414, 'timestamp': '2025-09-10 02:44:33.275191', 'step': 15810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:33.305079', 'step': 15810, 'epoch': 3} {'type': 'loss', 'content': 0.08972086757421494, 'timestamp': '2025-09-10 02:44:33.307569', 'step': 15811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:33.337489', 'step': 15811, 'epoch': 3} {'type': 'loss', 'content': 0.0790138691663742, 'timestamp': '2025-09-10 02:44:33.361689', 'step': 15812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:33.391538', 'step': 15812, 'epoch': 3} {'type': 'loss', 'content': 0.10518517345190048, 'timestamp': '2025-09-10 02:44:33.393992', 'step': 15813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:33.426702', 'step': 15813, 'epoch': 3} {'type': 'loss', 'content': 0.1099901869893074, 'timestamp': '2025-09-10 02:44:33.428716', 'step': 15814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:33.458896', 'step': 15814, 'epoch': 3} {'type': 'loss', 'content': 0.0586380697786808, 'timestamp': '2025-09-10 02:44:33.460871', 'step': 15815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:33.490888', 'step': 15815, 'epoch': 3} {'type': 'loss', 'content': 0.06955454498529434, 'timestamp': '2025-09-10 02:44:33.514824', 'step': 15816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:33.544694', 'step': 15816, 'epoch': 3} {'type': 'loss', 'content': 0.07880289852619171, 'timestamp': '2025-09-10 02:44:33.546824', 'step': 15817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:33.576191', 'step': 15817, 'epoch': 3} {'type': 'loss', 'content': 0.0704898089170456, 'timestamp': '2025-09-10 02:44:33.578291', 'step': 15818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:33.608023', 'step': 15818, 'epoch': 3} {'type': 'loss', 'content': 0.12628111243247986, 'timestamp': '2025-09-10 02:44:33.610272', 'step': 15819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:33.640134', 'step': 15819, 'epoch': 3} {'type': 'loss', 'content': 0.13853730261325836, 'timestamp': '2025-09-10 02:44:33.663509', 'step': 15820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:33.693635', 'step': 15820, 'epoch': 3} {'type': 'loss', 'content': 0.06194284185767174, 'timestamp': '2025-09-10 02:44:33.695790', 'step': 15821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:33.726451', 'step': 15821, 'epoch': 3} {'type': 'loss', 'content': 0.0546112097799778, 'timestamp': '2025-09-10 02:44:33.728734', 'step': 15822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:33.758590', 'step': 15822, 'epoch': 3} {'type': 'loss', 'content': 0.06782218813896179, 'timestamp': '2025-09-10 02:44:33.760918', 'step': 15823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:33.790374', 'step': 15823, 'epoch': 3} {'type': 'loss', 'content': 0.06904394924640656, 'timestamp': '2025-09-10 02:44:33.814012', 'step': 15824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:33.844290', 'step': 15824, 'epoch': 3} {'type': 'loss', 'content': 0.10085201263427734, 'timestamp': '2025-09-10 02:44:33.846363', 'step': 15825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:33.876745', 'step': 15825, 'epoch': 3} {'type': 'loss', 'content': 0.09268379956483841, 'timestamp': '2025-09-10 02:44:33.879422', 'step': 15826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:33.910037', 'step': 15826, 'epoch': 3} {'type': 'loss', 'content': 0.07826811075210571, 'timestamp': '2025-09-10 02:44:33.912431', 'step': 15827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:33.942153', 'step': 15827, 'epoch': 3} {'type': 'loss', 'content': 0.06517677754163742, 'timestamp': '2025-09-10 02:44:33.965939', 'step': 15828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:33.996306', 'step': 15828, 'epoch': 3} {'type': 'loss', 'content': 0.09721533209085464, 'timestamp': '2025-09-10 02:44:33.998706', 'step': 15829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:34.029859', 'step': 15829, 'epoch': 3} {'type': 'loss', 'content': 0.10834377259016037, 'timestamp': '2025-09-10 02:44:34.031801', 'step': 15830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:34.061466', 'step': 15830, 'epoch': 3} {'type': 'loss', 'content': 0.14558780193328857, 'timestamp': '2025-09-10 02:44:34.063727', 'step': 15831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:34.093240', 'step': 15831, 'epoch': 3} {'type': 'loss', 'content': 0.06588569283485413, 'timestamp': '2025-09-10 02:44:34.116662', 'step': 15832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:34.148490', 'step': 15832, 'epoch': 3} {'type': 'loss', 'content': 0.09850500524044037, 'timestamp': '2025-09-10 02:44:34.150559', 'step': 15833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:34.182044', 'step': 15833, 'epoch': 3} {'type': 'loss', 'content': 0.03627947345376015, 'timestamp': '2025-09-10 02:44:34.184251', 'step': 15834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:44:34.214479', 'step': 15834, 'epoch': 3} {'type': 'loss', 'content': 0.19869282841682434, 'timestamp': '2025-09-10 02:44:34.221875', 'step': 15835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:34.252894', 'step': 15835, 'epoch': 3} {'type': 'loss', 'content': 0.07860444486141205, 'timestamp': '2025-09-10 02:44:34.276488', 'step': 15836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:34.306967', 'step': 15836, 'epoch': 3} {'type': 'loss', 'content': 0.10343832522630692, 'timestamp': '2025-09-10 02:44:34.310286', 'step': 15837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:34.340505', 'step': 15837, 'epoch': 3} {'type': 'loss', 'content': 0.09551718086004257, 'timestamp': '2025-09-10 02:44:34.343232', 'step': 15838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:34.373385', 'step': 15838, 'epoch': 3} {'type': 'loss', 'content': 0.10689829289913177, 'timestamp': '2025-09-10 02:44:34.375625', 'step': 15839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:34.406187', 'step': 15839, 'epoch': 3} {'type': 'loss', 'content': 0.05673760920763016, 'timestamp': '2025-09-10 02:44:34.430406', 'step': 15840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:34.462817', 'step': 15840, 'epoch': 3} {'type': 'loss', 'content': 0.1191299557685852, 'timestamp': '2025-09-10 02:44:34.465641', 'step': 15841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:34.497303', 'step': 15841, 'epoch': 3} {'type': 'loss', 'content': 0.06133991479873657, 'timestamp': '2025-09-10 02:44:34.500784', 'step': 15842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:34.532555', 'step': 15842, 'epoch': 3} {'type': 'loss', 'content': 0.09920082986354828, 'timestamp': '2025-09-10 02:44:34.535312', 'step': 15843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:34.568148', 'step': 15843, 'epoch': 3} {'type': 'loss', 'content': 0.13649104535579681, 'timestamp': '2025-09-10 02:44:34.592904', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:44:42.347717', 'step': 15844, 'epoch': 3} {'type': 'pplx', 'content': 11351.788258881745, 'timestamp': '2025-09-10 02:44:42.350711', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:42.381411', 'step': 15844, 'epoch': 3} {'type': 'loss', 'content': 0.07918776571750641, 'timestamp': '2025-09-10 02:44:42.383448', 'step': 15845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:42.413704', 'step': 15845, 'epoch': 3} {'type': 'loss', 'content': 0.15797697007656097, 'timestamp': '2025-09-10 02:44:42.416079', 'step': 15846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:42.446782', 'step': 15846, 'epoch': 3} {'type': 'loss', 'content': 0.1591077297925949, 'timestamp': '2025-09-10 02:44:42.449202', 'step': 15847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:42.480765', 'step': 15847, 'epoch': 3} {'type': 'loss', 'content': 0.08295845985412598, 'timestamp': '2025-09-10 02:44:42.504287', 'step': 15848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:42.534502', 'step': 15848, 'epoch': 3} {'type': 'loss', 'content': 0.09166882187128067, 'timestamp': '2025-09-10 02:44:42.537091', 'step': 15849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:42.568392', 'step': 15849, 'epoch': 3} {'type': 'loss', 'content': 0.053832001984119415, 'timestamp': '2025-09-10 02:44:42.573866', 'step': 15850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:42.609269', 'step': 15850, 'epoch': 3} {'type': 'loss', 'content': 0.07859333604574203, 'timestamp': '2025-09-10 02:44:42.611682', 'step': 15851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:42.643670', 'step': 15851, 'epoch': 3} {'type': 'loss', 'content': 0.09256929159164429, 'timestamp': '2025-09-10 02:44:42.667265', 'step': 15852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:42.697458', 'step': 15852, 'epoch': 3} {'type': 'loss', 'content': 0.0449281707406044, 'timestamp': '2025-09-10 02:44:42.699861', 'step': 15853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:42.730404', 'step': 15853, 'epoch': 3} {'type': 'loss', 'content': 0.02706644870340824, 'timestamp': '2025-09-10 02:44:42.732324', 'step': 15854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:42.762423', 'step': 15854, 'epoch': 3} {'type': 'loss', 'content': 0.08820223808288574, 'timestamp': '2025-09-10 02:44:42.764692', 'step': 15855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:42.795353', 'step': 15855, 'epoch': 3} {'type': 'loss', 'content': 0.07357484102249146, 'timestamp': '2025-09-10 02:44:42.818876', 'step': 15856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:42.849755', 'step': 15856, 'epoch': 3} {'type': 'loss', 'content': 0.07950722426176071, 'timestamp': '2025-09-10 02:44:42.851860', 'step': 15857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:42.881177', 'step': 15857, 'epoch': 3} {'type': 'loss', 'content': 0.06882493197917938, 'timestamp': '2025-09-10 02:44:42.883355', 'step': 15858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:44:42.913745', 'step': 15858, 'epoch': 3} {'type': 'loss', 'content': 0.07597384601831436, 'timestamp': '2025-09-10 02:44:42.917736', 'step': 15859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:42.959449', 'step': 15859, 'epoch': 3} {'type': 'loss', 'content': 0.09579683095216751, 'timestamp': '2025-09-10 02:44:42.983266', 'step': 15860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:43.018693', 'step': 15860, 'epoch': 3} {'type': 'loss', 'content': 0.07332563400268555, 'timestamp': '2025-09-10 02:44:43.021777', 'step': 15861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:43.063573', 'step': 15861, 'epoch': 3} {'type': 'loss', 'content': 0.03752768412232399, 'timestamp': '2025-09-10 02:44:43.067149', 'step': 15862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:43.105820', 'step': 15862, 'epoch': 3} {'type': 'loss', 'content': 0.12195394188165665, 'timestamp': '2025-09-10 02:44:43.109271', 'step': 15863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:43.143550', 'step': 15863, 'epoch': 3} {'type': 'loss', 'content': 0.029658999294042587, 'timestamp': '2025-09-10 02:44:43.169420', 'step': 15864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:43.205351', 'step': 15864, 'epoch': 3} {'type': 'loss', 'content': 0.06266018748283386, 'timestamp': '2025-09-10 02:44:43.212173', 'step': 15865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:43.246449', 'step': 15865, 'epoch': 3} {'type': 'loss', 'content': 0.033694423735141754, 'timestamp': '2025-09-10 02:44:43.250285', 'step': 15866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:43.289454', 'step': 15866, 'epoch': 3} {'type': 'loss', 'content': 0.10189797729253769, 'timestamp': '2025-09-10 02:44:43.291185', 'step': 15867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:43.324226', 'step': 15867, 'epoch': 3} {'type': 'loss', 'content': 0.1123923733830452, 'timestamp': '2025-09-10 02:44:43.348469', 'step': 15868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:43.388746', 'step': 15868, 'epoch': 3} {'type': 'loss', 'content': 0.09815047681331635, 'timestamp': '2025-09-10 02:44:43.393103', 'step': 15869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:43.431600', 'step': 15869, 'epoch': 3} {'type': 'loss', 'content': 0.09959018230438232, 'timestamp': '2025-09-10 02:44:43.436679', 'step': 15870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:43.480037', 'step': 15870, 'epoch': 3} {'type': 'loss', 'content': 0.11620192229747772, 'timestamp': '2025-09-10 02:44:43.489516', 'step': 15871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:43.532839', 'step': 15871, 'epoch': 3} {'type': 'loss', 'content': 0.11962588876485825, 'timestamp': '2025-09-10 02:44:43.811634', 'step': 15872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:43.872214', 'step': 15872, 'epoch': 3} {'type': 'loss', 'content': 0.10686429589986801, 'timestamp': '2025-09-10 02:44:43.877490', 'step': 15873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:43.933018', 'step': 15873, 'epoch': 3} {'type': 'loss', 'content': 0.0165251512080431, 'timestamp': '2025-09-10 02:44:43.938796', 'step': 15874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:43.975223', 'step': 15874, 'epoch': 3} {'type': 'loss', 'content': 0.11306439340114594, 'timestamp': '2025-09-10 02:44:43.989967', 'step': 15875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.042155', 'step': 15875, 'epoch': 3} {'type': 'loss', 'content': 0.04563962668180466, 'timestamp': '2025-09-10 02:44:44.075697', 'step': 15876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.118242', 'step': 15876, 'epoch': 3} {'type': 'loss', 'content': 0.07005741447210312, 'timestamp': '2025-09-10 02:44:44.131160', 'step': 15877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:44.175775', 'step': 15877, 'epoch': 3} {'type': 'loss', 'content': 0.09310680627822876, 'timestamp': '2025-09-10 02:44:44.188676', 'step': 15878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.237522', 'step': 15878, 'epoch': 3} {'type': 'loss', 'content': 0.15830568969249725, 'timestamp': '2025-09-10 02:44:44.240888', 'step': 15879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.276599', 'step': 15879, 'epoch': 3} {'type': 'loss', 'content': 0.05478906258940697, 'timestamp': '2025-09-10 02:44:44.303203', 'step': 15880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.333640', 'step': 15880, 'epoch': 3} {'type': 'loss', 'content': 0.10172352939844131, 'timestamp': '2025-09-10 02:44:44.345760', 'step': 15881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:44.388820', 'step': 15881, 'epoch': 3} {'type': 'loss', 'content': 0.06062987819314003, 'timestamp': '2025-09-10 02:44:44.396609', 'step': 15882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.442508', 'step': 15882, 'epoch': 3} {'type': 'loss', 'content': 0.12288765609264374, 'timestamp': '2025-09-10 02:44:44.448104', 'step': 15883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.502488', 'step': 15883, 'epoch': 3} {'type': 'loss', 'content': 0.10249795764684677, 'timestamp': '2025-09-10 02:44:44.530706', 'step': 15884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:44.571838', 'step': 15884, 'epoch': 3} {'type': 'loss', 'content': 0.0694768875837326, 'timestamp': '2025-09-10 02:44:44.577343', 'step': 15885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.611213', 'step': 15885, 'epoch': 3} {'type': 'loss', 'content': 0.02536875009536743, 'timestamp': '2025-09-10 02:44:44.613165', 'step': 15886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:44.649455', 'step': 15886, 'epoch': 3} {'type': 'loss', 'content': 0.05485519766807556, 'timestamp': '2025-09-10 02:44:44.670250', 'step': 15887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:44.714884', 'step': 15887, 'epoch': 3} {'type': 'loss', 'content': 0.10035558044910431, 'timestamp': '2025-09-10 02:44:44.746667', 'step': 15888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.790811', 'step': 15888, 'epoch': 3} {'type': 'loss', 'content': 0.049462150782346725, 'timestamp': '2025-09-10 02:44:44.795346', 'step': 15889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:44.833176', 'step': 15889, 'epoch': 3} {'type': 'loss', 'content': 0.10605746507644653, 'timestamp': '2025-09-10 02:44:44.839848', 'step': 15890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:44.881261', 'step': 15890, 'epoch': 3} {'type': 'loss', 'content': 0.08878453075885773, 'timestamp': '2025-09-10 02:44:44.903557', 'step': 15891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:44.942573', 'step': 15891, 'epoch': 3} {'type': 'loss', 'content': 0.14471504092216492, 'timestamp': '2025-09-10 02:44:44.970099', 'step': 15892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:45.019024', 'step': 15892, 'epoch': 3} {'type': 'loss', 'content': 0.1005384773015976, 'timestamp': '2025-09-10 02:44:45.027509', 'step': 15893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:45.067300', 'step': 15893, 'epoch': 3} {'type': 'loss', 'content': 0.03942720964550972, 'timestamp': '2025-09-10 02:44:45.079358', 'step': 15894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:45.126056', 'step': 15894, 'epoch': 3} {'type': 'loss', 'content': 0.09513942897319794, 'timestamp': '2025-09-10 02:44:45.133345', 'step': 15895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:45.186526', 'step': 15895, 'epoch': 3} {'type': 'loss', 'content': 0.07677830010652542, 'timestamp': '2025-09-10 02:44:45.213109', 'step': 15896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:45.261247', 'step': 15896, 'epoch': 3} {'type': 'loss', 'content': 0.06291437149047852, 'timestamp': '2025-09-10 02:44:45.266028', 'step': 15897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:45.317934', 'step': 15897, 'epoch': 3} {'type': 'loss', 'content': 0.11367172002792358, 'timestamp': '2025-09-10 02:44:45.336494', 'step': 15898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:45.388535', 'step': 15898, 'epoch': 3} {'type': 'loss', 'content': 0.08201824873685837, 'timestamp': '2025-09-10 02:44:45.392800', 'step': 15899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:45.443040', 'step': 15899, 'epoch': 3} {'type': 'loss', 'content': 0.0577283650636673, 'timestamp': '2025-09-10 02:44:45.467682', 'step': 15900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:45.513414', 'step': 15900, 'epoch': 3} {'type': 'loss', 'content': 0.1347997635602951, 'timestamp': '2025-09-10 02:44:45.528217', 'step': 15901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:45.573944', 'step': 15901, 'epoch': 3} {'type': 'loss', 'content': 0.07083244621753693, 'timestamp': '2025-09-10 02:44:45.577848', 'step': 15902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:45.623130', 'step': 15902, 'epoch': 3} {'type': 'loss', 'content': 0.022810889407992363, 'timestamp': '2025-09-10 02:44:45.629823', 'step': 15903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:45.670202', 'step': 15903, 'epoch': 3} {'type': 'loss', 'content': 0.06352915614843369, 'timestamp': '2025-09-10 02:44:45.696569', 'step': 15904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:45.745747', 'step': 15904, 'epoch': 3} {'type': 'loss', 'content': 0.10409282147884369, 'timestamp': '2025-09-10 02:44:45.751778', 'step': 15905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:45.806487', 'step': 15905, 'epoch': 3} {'type': 'loss', 'content': 0.06066100299358368, 'timestamp': '2025-09-10 02:44:45.812705', 'step': 15906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:45.855509', 'step': 15906, 'epoch': 3} {'type': 'loss', 'content': 0.08148515969514847, 'timestamp': '2025-09-10 02:44:45.868030', 'step': 15907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:45.907523', 'step': 15907, 'epoch': 3} {'type': 'loss', 'content': 0.07434725016355515, 'timestamp': '2025-09-10 02:44:45.934287', 'step': 15908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:45.980993', 'step': 15908, 'epoch': 3} {'type': 'loss', 'content': 0.10135161131620407, 'timestamp': '2025-09-10 02:44:46.001174', 'step': 15909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:46.050019', 'step': 15909, 'epoch': 3} {'type': 'loss', 'content': 0.0768091082572937, 'timestamp': '2025-09-10 02:44:46.065164', 'step': 15910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:46.121734', 'step': 15910, 'epoch': 3} {'type': 'loss', 'content': 0.03830845654010773, 'timestamp': '2025-09-10 02:44:46.130026', 'step': 15911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:46.211583', 'step': 15911, 'epoch': 3} {'type': 'loss', 'content': 0.10291620343923569, 'timestamp': '2025-09-10 02:44:46.245206', 'step': 15912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:46.296502', 'step': 15912, 'epoch': 3} {'type': 'loss', 'content': 0.06728433817625046, 'timestamp': '2025-09-10 02:44:46.301315', 'step': 15913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:46.350367', 'step': 15913, 'epoch': 3} {'type': 'loss', 'content': 0.07855326682329178, 'timestamp': '2025-09-10 02:44:46.354277', 'step': 15914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:46.399995', 'step': 15914, 'epoch': 3} {'type': 'loss', 'content': 0.05062378942966461, 'timestamp': '2025-09-10 02:44:46.407364', 'step': 15915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:46.451556', 'step': 15915, 'epoch': 3} {'type': 'loss', 'content': 0.0892680436372757, 'timestamp': '2025-09-10 02:44:46.481599', 'step': 15916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:46.526194', 'step': 15916, 'epoch': 3} {'type': 'loss', 'content': 0.06282541900873184, 'timestamp': '2025-09-10 02:44:46.532849', 'step': 15917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:46.583029', 'step': 15917, 'epoch': 3} {'type': 'loss', 'content': 0.11286428570747375, 'timestamp': '2025-09-10 02:44:46.606039', 'step': 15918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:46.665888', 'step': 15918, 'epoch': 3} {'type': 'loss', 'content': 0.08887328207492828, 'timestamp': '2025-09-10 02:44:46.679690', 'step': 15919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:46.727603', 'step': 15919, 'epoch': 3} {'type': 'loss', 'content': 0.05777931585907936, 'timestamp': '2025-09-10 02:44:46.756773', 'step': 15920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:46.805867', 'step': 15920, 'epoch': 3} {'type': 'loss', 'content': 0.07204059511423111, 'timestamp': '2025-09-10 02:44:46.812099', 'step': 15921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:46.873276', 'step': 15921, 'epoch': 3} {'type': 'loss', 'content': 0.08592796325683594, 'timestamp': '2025-09-10 02:44:46.881760', 'step': 15922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:46.927039', 'step': 15922, 'epoch': 3} {'type': 'loss', 'content': 0.13718314468860626, 'timestamp': '2025-09-10 02:44:46.933464', 'step': 15923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:46.971945', 'step': 15923, 'epoch': 3} {'type': 'loss', 'content': 0.053618043661117554, 'timestamp': '2025-09-10 02:44:46.997343', 'step': 15924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:47.028403', 'step': 15924, 'epoch': 3} {'type': 'loss', 'content': 0.03720042109489441, 'timestamp': '2025-09-10 02:44:47.031254', 'step': 15925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:47.063620', 'step': 15925, 'epoch': 3} {'type': 'loss', 'content': 0.06411908566951752, 'timestamp': '2025-09-10 02:44:47.066231', 'step': 15926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:47.097563', 'step': 15926, 'epoch': 3} {'type': 'loss', 'content': 0.07282817363739014, 'timestamp': '2025-09-10 02:44:47.100021', 'step': 15927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:47.129993', 'step': 15927, 'epoch': 3} {'type': 'loss', 'content': 0.05376073345541954, 'timestamp': '2025-09-10 02:44:47.154336', 'step': 15928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:47.185930', 'step': 15928, 'epoch': 3} {'type': 'loss', 'content': 0.07762987166643143, 'timestamp': '2025-09-10 02:44:47.188767', 'step': 15929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:47.220877', 'step': 15929, 'epoch': 3} {'type': 'loss', 'content': 0.0530029833316803, 'timestamp': '2025-09-10 02:44:47.223095', 'step': 15930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:47.253336', 'step': 15930, 'epoch': 3} {'type': 'loss', 'content': 0.17202454805374146, 'timestamp': '2025-09-10 02:44:47.256648', 'step': 15931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:47.287212', 'step': 15931, 'epoch': 3} {'type': 'loss', 'content': 0.049212124198675156, 'timestamp': '2025-09-10 02:44:47.310681', 'step': 15932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:47.342270', 'step': 15932, 'epoch': 3} {'type': 'loss', 'content': 0.08314991742372513, 'timestamp': '2025-09-10 02:44:47.344918', 'step': 15933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:47.377655', 'step': 15933, 'epoch': 3} {'type': 'loss', 'content': 0.07384290546178818, 'timestamp': '2025-09-10 02:44:47.381616', 'step': 15934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:47.417565', 'step': 15934, 'epoch': 3} {'type': 'loss', 'content': 0.06828988343477249, 'timestamp': '2025-09-10 02:44:47.419898', 'step': 15935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:47.450728', 'step': 15935, 'epoch': 3} {'type': 'loss', 'content': 0.10281511396169662, 'timestamp': '2025-09-10 02:44:47.474751', 'step': 15936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:47.509183', 'step': 15936, 'epoch': 3} {'type': 'loss', 'content': 0.021713605150580406, 'timestamp': '2025-09-10 02:44:47.511601', 'step': 15937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:47.544586', 'step': 15937, 'epoch': 3} {'type': 'loss', 'content': 0.04735404625535011, 'timestamp': '2025-09-10 02:44:47.546789', 'step': 15938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:47.576763', 'step': 15938, 'epoch': 3} {'type': 'loss', 'content': 0.05189945548772812, 'timestamp': '2025-09-10 02:44:47.579629', 'step': 15939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:47.611868', 'step': 15939, 'epoch': 3} {'type': 'loss', 'content': 0.12565214931964874, 'timestamp': '2025-09-10 02:44:47.637589', 'step': 15940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:47.669470', 'step': 15940, 'epoch': 3} {'type': 'loss', 'content': 0.07517530024051666, 'timestamp': '2025-09-10 02:44:47.671601', 'step': 15941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:47.708369', 'step': 15941, 'epoch': 3} {'type': 'loss', 'content': 0.11216124147176743, 'timestamp': '2025-09-10 02:44:47.710389', 'step': 15942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:47.740873', 'step': 15942, 'epoch': 3} {'type': 'loss', 'content': 0.08212769776582718, 'timestamp': '2025-09-10 02:44:47.745405', 'step': 15943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:47.776056', 'step': 15943, 'epoch': 3} {'type': 'loss', 'content': 0.04957626014947891, 'timestamp': '2025-09-10 02:44:47.800027', 'step': 15944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:47.834312', 'step': 15944, 'epoch': 3} {'type': 'loss', 'content': 0.08171539008617401, 'timestamp': '2025-09-10 02:44:47.837086', 'step': 15945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:47.868129', 'step': 15945, 'epoch': 3} {'type': 'loss', 'content': 0.06255777925252914, 'timestamp': '2025-09-10 02:44:47.870760', 'step': 15946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:47.901680', 'step': 15946, 'epoch': 3} {'type': 'loss', 'content': 0.0550859235227108, 'timestamp': '2025-09-10 02:44:47.904604', 'step': 15947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:47.936890', 'step': 15947, 'epoch': 3} {'type': 'loss', 'content': 0.11280348896980286, 'timestamp': '2025-09-10 02:44:47.960510', 'step': 15948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:47.991131', 'step': 15948, 'epoch': 3} {'type': 'loss', 'content': 0.11258242279291153, 'timestamp': '2025-09-10 02:44:47.993141', 'step': 15949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:48.022402', 'step': 15949, 'epoch': 3} {'type': 'loss', 'content': 0.043088555335998535, 'timestamp': '2025-09-10 02:44:48.025144', 'step': 15950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:48.055219', 'step': 15950, 'epoch': 3} {'type': 'loss', 'content': 0.14870916306972504, 'timestamp': '2025-09-10 02:44:48.058013', 'step': 15951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:48.091987', 'step': 15951, 'epoch': 3} {'type': 'loss', 'content': 0.09368077665567398, 'timestamp': '2025-09-10 02:44:48.115590', 'step': 15952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:48.146610', 'step': 15952, 'epoch': 3} {'type': 'loss', 'content': 0.18182171881198883, 'timestamp': '2025-09-10 02:44:48.148911', 'step': 15953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:48.180310', 'step': 15953, 'epoch': 3} {'type': 'loss', 'content': 0.06909579038619995, 'timestamp': '2025-09-10 02:44:48.182500', 'step': 15954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:48.213707', 'step': 15954, 'epoch': 3} {'type': 'loss', 'content': 0.10996627807617188, 'timestamp': '2025-09-10 02:44:48.217579', 'step': 15955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:48.248454', 'step': 15955, 'epoch': 3} {'type': 'loss', 'content': 0.058747246861457825, 'timestamp': '2025-09-10 02:44:48.273278', 'step': 15956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:48.304979', 'step': 15956, 'epoch': 3} {'type': 'loss', 'content': 0.05337167903780937, 'timestamp': '2025-09-10 02:44:48.307134', 'step': 15957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:48.337482', 'step': 15957, 'epoch': 3} {'type': 'loss', 'content': 0.050875600427389145, 'timestamp': '2025-09-10 02:44:48.340009', 'step': 15958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:48.376287', 'step': 15958, 'epoch': 3} {'type': 'loss', 'content': 0.0882965549826622, 'timestamp': '2025-09-10 02:44:48.378660', 'step': 15959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:44:48.408824', 'step': 15959, 'epoch': 3} {'type': 'loss', 'content': 0.030996525660157204, 'timestamp': '2025-09-10 02:44:48.433657', 'step': 15960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:48.470189', 'step': 15960, 'epoch': 3} {'type': 'loss', 'content': 0.12491586804389954, 'timestamp': '2025-09-10 02:44:48.472363', 'step': 15961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:48.505524', 'step': 15961, 'epoch': 3} {'type': 'loss', 'content': 0.10069581121206284, 'timestamp': '2025-09-10 02:44:48.507495', 'step': 15962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:48.537048', 'step': 15962, 'epoch': 3} {'type': 'loss', 'content': 0.05144790560007095, 'timestamp': '2025-09-10 02:44:48.539395', 'step': 15963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:48.571300', 'step': 15963, 'epoch': 3} {'type': 'loss', 'content': 0.11534518748521805, 'timestamp': '2025-09-10 02:44:48.595319', 'step': 15964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:48.632172', 'step': 15964, 'epoch': 3} {'type': 'loss', 'content': 0.11000964790582657, 'timestamp': '2025-09-10 02:44:48.634762', 'step': 15965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:48.667008', 'step': 15965, 'epoch': 3} {'type': 'loss', 'content': 0.026230575516819954, 'timestamp': '2025-09-10 02:44:48.669096', 'step': 15966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:48.700882', 'step': 15966, 'epoch': 3} {'type': 'loss', 'content': 0.13521410524845123, 'timestamp': '2025-09-10 02:44:48.702858', 'step': 15967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:48.732550', 'step': 15967, 'epoch': 3} {'type': 'loss', 'content': 0.05432504415512085, 'timestamp': '2025-09-10 02:44:48.756423', 'step': 15968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:48.786346', 'step': 15968, 'epoch': 3} {'type': 'loss', 'content': 0.03731248900294304, 'timestamp': '2025-09-10 02:44:48.788530', 'step': 15969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:48.818999', 'step': 15969, 'epoch': 3} {'type': 'loss', 'content': 0.06121710687875748, 'timestamp': '2025-09-10 02:44:48.822997', 'step': 15970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:48.856122', 'step': 15970, 'epoch': 3} {'type': 'loss', 'content': 0.080679751932621, 'timestamp': '2025-09-10 02:44:48.860094', 'step': 15971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:48.890343', 'step': 15971, 'epoch': 3} {'type': 'loss', 'content': 0.056443747133016586, 'timestamp': '2025-09-10 02:44:48.914652', 'step': 15972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:48.947424', 'step': 15972, 'epoch': 3} {'type': 'loss', 'content': 0.07407032698392868, 'timestamp': '2025-09-10 02:44:48.949914', 'step': 15973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:48.980004', 'step': 15973, 'epoch': 3} {'type': 'loss', 'content': 0.06660908460617065, 'timestamp': '2025-09-10 02:44:48.982484', 'step': 15974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.014216', 'step': 15974, 'epoch': 3} {'type': 'loss', 'content': 0.08446967601776123, 'timestamp': '2025-09-10 02:44:49.018187', 'step': 15975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:44:49.052692', 'step': 15975, 'epoch': 3} {'type': 'loss', 'content': 0.14137238264083862, 'timestamp': '2025-09-10 02:44:49.077566', 'step': 15976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.107798', 'step': 15976, 'epoch': 3} {'type': 'loss', 'content': 0.18265439569950104, 'timestamp': '2025-09-10 02:44:49.109641', 'step': 15977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.140506', 'step': 15977, 'epoch': 3} {'type': 'loss', 'content': 0.048488959670066833, 'timestamp': '2025-09-10 02:44:49.142880', 'step': 15978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:49.173638', 'step': 15978, 'epoch': 3} {'type': 'loss', 'content': 0.06815177947282791, 'timestamp': '2025-09-10 02:44:49.176803', 'step': 15979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:49.208689', 'step': 15979, 'epoch': 3} {'type': 'loss', 'content': 0.08709671348333359, 'timestamp': '2025-09-10 02:44:49.232018', 'step': 15980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:49.262571', 'step': 15980, 'epoch': 3} {'type': 'loss', 'content': 0.20716680586338043, 'timestamp': '2025-09-10 02:44:49.264888', 'step': 15981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:49.295026', 'step': 15981, 'epoch': 3} {'type': 'loss', 'content': 0.0534152053296566, 'timestamp': '2025-09-10 02:44:49.298260', 'step': 15982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.331227', 'step': 15982, 'epoch': 3} {'type': 'loss', 'content': 0.04859185591340065, 'timestamp': '2025-09-10 02:44:49.334474', 'step': 15983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:49.376091', 'step': 15983, 'epoch': 3} {'type': 'loss', 'content': 0.0728750079870224, 'timestamp': '2025-09-10 02:44:49.402452', 'step': 15984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.436905', 'step': 15984, 'epoch': 3} {'type': 'loss', 'content': 0.0671815276145935, 'timestamp': '2025-09-10 02:44:49.439855', 'step': 15985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.472332', 'step': 15985, 'epoch': 3} {'type': 'loss', 'content': 0.0956348404288292, 'timestamp': '2025-09-10 02:44:49.476278', 'step': 15986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:49.512004', 'step': 15986, 'epoch': 3} {'type': 'loss', 'content': 0.07052727788686752, 'timestamp': '2025-09-10 02:44:49.515204', 'step': 15987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:49.547059', 'step': 15987, 'epoch': 3} {'type': 'loss', 'content': 0.13422313332557678, 'timestamp': '2025-09-10 02:44:49.570753', 'step': 15988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:49.603088', 'step': 15988, 'epoch': 3} {'type': 'loss', 'content': 0.09188194572925568, 'timestamp': '2025-09-10 02:44:49.604934', 'step': 15989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:49.645446', 'step': 15989, 'epoch': 3} {'type': 'loss', 'content': 0.07927402853965759, 'timestamp': '2025-09-10 02:44:49.647701', 'step': 15990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.678139', 'step': 15990, 'epoch': 3} {'type': 'loss', 'content': 0.08679654449224472, 'timestamp': '2025-09-10 02:44:49.680363', 'step': 15991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:49.710414', 'step': 15991, 'epoch': 3} {'type': 'loss', 'content': 0.0573074072599411, 'timestamp': '2025-09-10 02:44:49.733797', 'step': 15992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.763880', 'step': 15992, 'epoch': 3} {'type': 'loss', 'content': 0.11316026747226715, 'timestamp': '2025-09-10 02:44:49.766135', 'step': 15993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.800809', 'step': 15993, 'epoch': 3} {'type': 'loss', 'content': 0.08761676400899887, 'timestamp': '2025-09-10 02:44:49.803350', 'step': 15994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:49.834837', 'step': 15994, 'epoch': 3} {'type': 'loss', 'content': 0.08846376091241837, 'timestamp': '2025-09-10 02:44:49.837353', 'step': 15995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.868753', 'step': 15995, 'epoch': 3} {'type': 'loss', 'content': 0.07382813841104507, 'timestamp': '2025-09-10 02:44:49.892550', 'step': 15996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.927288', 'step': 15996, 'epoch': 3} {'type': 'loss', 'content': 0.11996567994356155, 'timestamp': '2025-09-10 02:44:49.929093', 'step': 15997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:49.958818', 'step': 15997, 'epoch': 3} {'type': 'loss', 'content': 0.05007101222872734, 'timestamp': '2025-09-10 02:44:49.961574', 'step': 15998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:49.992966', 'step': 15998, 'epoch': 3} {'type': 'loss', 'content': 0.019077518954873085, 'timestamp': '2025-09-10 02:44:49.996193', 'step': 15999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:50.030796', 'step': 15999, 'epoch': 3} {'type': 'loss', 'content': 0.1687820851802826, 'timestamp': '2025-09-10 02:44:50.057131', 'step': 16000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16000', 'timestamp': '2025-09-10 02:44:55.291626', 'step': 16000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:55.364334', 'step': 16000, 'epoch': 3} {'type': 'loss', 'content': 0.046267829835414886, 'timestamp': '2025-09-10 02:44:55.374388', 'step': 16001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:55.428008', 'step': 16001, 'epoch': 3} {'type': 'loss', 'content': 0.017336629331111908, 'timestamp': '2025-09-10 02:44:55.437538', 'step': 16002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:55.491865', 'step': 16002, 'epoch': 3} {'type': 'loss', 'content': 0.12415385246276855, 'timestamp': '2025-09-10 02:44:55.503991', 'step': 16003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:55.559490', 'step': 16003, 'epoch': 3} {'type': 'loss', 'content': 0.02704630419611931, 'timestamp': '2025-09-10 02:44:55.587196', 'step': 16004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:55.641147', 'step': 16004, 'epoch': 3} {'type': 'loss', 'content': 0.08339156955480576, 'timestamp': '2025-09-10 02:44:55.644633', 'step': 16005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:55.700845', 'step': 16005, 'epoch': 3} {'type': 'loss', 'content': 0.059944357722997665, 'timestamp': '2025-09-10 02:44:55.707777', 'step': 16006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:55.757759', 'step': 16006, 'epoch': 3} {'type': 'loss', 'content': 0.10072312504053116, 'timestamp': '2025-09-10 02:44:55.765568', 'step': 16007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:55.807049', 'step': 16007, 'epoch': 3} {'type': 'loss', 'content': 0.06747133284807205, 'timestamp': '2025-09-10 02:44:55.841390', 'step': 16008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:55.896855', 'step': 16008, 'epoch': 3} {'type': 'loss', 'content': 0.05004734918475151, 'timestamp': '2025-09-10 02:44:55.903749', 'step': 16009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:55.957371', 'step': 16009, 'epoch': 3} {'type': 'loss', 'content': 0.10439164936542511, 'timestamp': '2025-09-10 02:44:55.965416', 'step': 16010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:56.012874', 'step': 16010, 'epoch': 3} {'type': 'loss', 'content': 0.062105461955070496, 'timestamp': '2025-09-10 02:44:56.023098', 'step': 16011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:56.072305', 'step': 16011, 'epoch': 3} {'type': 'loss', 'content': 0.06911828368902206, 'timestamp': '2025-09-10 02:44:56.102912', 'step': 16012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:56.156439', 'step': 16012, 'epoch': 3} {'type': 'loss', 'content': 0.08039256185293198, 'timestamp': '2025-09-10 02:44:56.161842', 'step': 16013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:56.210309', 'step': 16013, 'epoch': 3} {'type': 'loss', 'content': 0.05736725032329559, 'timestamp': '2025-09-10 02:44:56.217325', 'step': 16014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:56.271037', 'step': 16014, 'epoch': 3} {'type': 'loss', 'content': 0.06980238109827042, 'timestamp': '2025-09-10 02:44:56.275735', 'step': 16015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:56.314214', 'step': 16015, 'epoch': 3} {'type': 'loss', 'content': 0.07144460827112198, 'timestamp': '2025-09-10 02:44:56.348439', 'step': 16016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:56.391549', 'step': 16016, 'epoch': 3} {'type': 'loss', 'content': 0.10194772481918335, 'timestamp': '2025-09-10 02:44:56.396779', 'step': 16017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:56.446420', 'step': 16017, 'epoch': 3} {'type': 'loss', 'content': 0.08332239836454391, 'timestamp': '2025-09-10 02:44:56.453976', 'step': 16018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:56.502296', 'step': 16018, 'epoch': 3} {'type': 'loss', 'content': 0.07422374188899994, 'timestamp': '2025-09-10 02:44:56.519487', 'step': 16019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:56.560181', 'step': 16019, 'epoch': 3} {'type': 'loss', 'content': 0.07469898462295532, 'timestamp': '2025-09-10 02:44:56.586563', 'step': 16020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:56.644457', 'step': 16020, 'epoch': 3} {'type': 'loss', 'content': 0.03735532984137535, 'timestamp': '2025-09-10 02:44:56.651766', 'step': 16021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:56.701017', 'step': 16021, 'epoch': 3} {'type': 'loss', 'content': 0.03616264835000038, 'timestamp': '2025-09-10 02:44:56.706648', 'step': 16022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:56.761652', 'step': 16022, 'epoch': 3} {'type': 'loss', 'content': 0.11149482429027557, 'timestamp': '2025-09-10 02:44:56.770587', 'step': 16023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:56.825791', 'step': 16023, 'epoch': 3} {'type': 'loss', 'content': 0.04982108250260353, 'timestamp': '2025-09-10 02:44:56.857103', 'step': 16024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:56.906322', 'step': 16024, 'epoch': 3} {'type': 'loss', 'content': 0.07587599754333496, 'timestamp': '2025-09-10 02:44:56.923182', 'step': 16025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:56.979209', 'step': 16025, 'epoch': 3} {'type': 'loss', 'content': 0.06812840700149536, 'timestamp': '2025-09-10 02:44:56.990518', 'step': 16026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:57.041997', 'step': 16026, 'epoch': 3} {'type': 'loss', 'content': 0.08209340274333954, 'timestamp': '2025-09-10 02:44:57.049223', 'step': 16027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:57.115627', 'step': 16027, 'epoch': 3} {'type': 'loss', 'content': 0.10780255496501923, 'timestamp': '2025-09-10 02:44:57.142593', 'step': 16028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:57.189652', 'step': 16028, 'epoch': 3} {'type': 'loss', 'content': 0.08435641974210739, 'timestamp': '2025-09-10 02:44:57.195740', 'step': 16029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:57.248430', 'step': 16029, 'epoch': 3} {'type': 'loss', 'content': 0.07424074411392212, 'timestamp': '2025-09-10 02:44:57.257913', 'step': 16030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:57.308886', 'step': 16030, 'epoch': 3} {'type': 'loss', 'content': 0.10320273041725159, 'timestamp': '2025-09-10 02:44:57.317697', 'step': 16031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:44:57.379257', 'step': 16031, 'epoch': 3} {'type': 'loss', 'content': 0.05673126131296158, 'timestamp': '2025-09-10 02:44:57.407672', 'step': 16032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:57.470216', 'step': 16032, 'epoch': 3} {'type': 'loss', 'content': 0.1002499982714653, 'timestamp': '2025-09-10 02:44:57.479541', 'step': 16033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:57.539000', 'step': 16033, 'epoch': 3} {'type': 'loss', 'content': 0.06594493240118027, 'timestamp': '2025-09-10 02:44:57.548427', 'step': 16034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:57.600780', 'step': 16034, 'epoch': 3} {'type': 'loss', 'content': 0.05910586565732956, 'timestamp': '2025-09-10 02:44:57.614498', 'step': 16035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:57.660542', 'step': 16035, 'epoch': 3} {'type': 'loss', 'content': 0.06960926949977875, 'timestamp': '2025-09-10 02:44:57.698579', 'step': 16036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:57.764288', 'step': 16036, 'epoch': 3} {'type': 'loss', 'content': 0.05245120823383331, 'timestamp': '2025-09-10 02:44:57.772967', 'step': 16037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:57.817767', 'step': 16037, 'epoch': 3} {'type': 'loss', 'content': 0.15903699398040771, 'timestamp': '2025-09-10 02:44:57.827363', 'step': 16038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:57.874231', 'step': 16038, 'epoch': 3} {'type': 'loss', 'content': 0.11220542341470718, 'timestamp': '2025-09-10 02:44:57.879694', 'step': 16039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:57.930652', 'step': 16039, 'epoch': 3} {'type': 'loss', 'content': 0.05970983952283859, 'timestamp': '2025-09-10 02:44:57.959349', 'step': 16040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:58.016709', 'step': 16040, 'epoch': 3} {'type': 'loss', 'content': 0.11449168622493744, 'timestamp': '2025-09-10 02:44:58.027456', 'step': 16041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:58.073495', 'step': 16041, 'epoch': 3} {'type': 'loss', 'content': 0.10884932428598404, 'timestamp': '2025-09-10 02:44:58.079272', 'step': 16042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:58.130902', 'step': 16042, 'epoch': 3} {'type': 'loss', 'content': 0.06794953346252441, 'timestamp': '2025-09-10 02:44:58.135099', 'step': 16043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:58.178941', 'step': 16043, 'epoch': 3} {'type': 'loss', 'content': 0.1485120803117752, 'timestamp': '2025-09-10 02:44:58.205548', 'step': 16044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:58.262326', 'step': 16044, 'epoch': 3} {'type': 'loss', 'content': 0.07722306251525879, 'timestamp': '2025-09-10 02:44:58.269726', 'step': 16045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:58.317658', 'step': 16045, 'epoch': 3} {'type': 'loss', 'content': 0.1024857759475708, 'timestamp': '2025-09-10 02:44:58.333095', 'step': 16046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:44:58.380367', 'step': 16046, 'epoch': 3} {'type': 'loss', 'content': 0.10233526676893234, 'timestamp': '2025-09-10 02:44:58.386185', 'step': 16047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:58.427186', 'step': 16047, 'epoch': 3} {'type': 'loss', 'content': 0.09560087323188782, 'timestamp': '2025-09-10 02:44:58.456679', 'step': 16048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:58.521626', 'step': 16048, 'epoch': 3} {'type': 'loss', 'content': 0.08595655858516693, 'timestamp': '2025-09-10 02:44:58.528375', 'step': 16049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:58.575685', 'step': 16049, 'epoch': 3} {'type': 'loss', 'content': 0.06532825529575348, 'timestamp': '2025-09-10 02:44:58.584107', 'step': 16050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:58.623816', 'step': 16050, 'epoch': 3} {'type': 'loss', 'content': 0.1277337521314621, 'timestamp': '2025-09-10 02:44:58.635285', 'step': 16051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:58.679164', 'step': 16051, 'epoch': 3} {'type': 'loss', 'content': 0.08580014854669571, 'timestamp': '2025-09-10 02:44:58.707540', 'step': 16052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:58.766020', 'step': 16052, 'epoch': 3} {'type': 'loss', 'content': 0.14846384525299072, 'timestamp': '2025-09-10 02:44:58.778705', 'step': 16053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:58.834773', 'step': 16053, 'epoch': 3} {'type': 'loss', 'content': 0.0451848991215229, 'timestamp': '2025-09-10 02:44:58.840719', 'step': 16054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:58.884489', 'step': 16054, 'epoch': 3} {'type': 'loss', 'content': 0.07939019799232483, 'timestamp': '2025-09-10 02:44:58.889062', 'step': 16055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:58.933960', 'step': 16055, 'epoch': 3} {'type': 'loss', 'content': 0.17060568928718567, 'timestamp': '2025-09-10 02:44:58.960277', 'step': 16056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:59.016216', 'step': 16056, 'epoch': 3} {'type': 'loss', 'content': 0.055202268064022064, 'timestamp': '2025-09-10 02:44:59.021248', 'step': 16057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:59.083217', 'step': 16057, 'epoch': 3} {'type': 'loss', 'content': 0.09723438322544098, 'timestamp': '2025-09-10 02:44:59.090242', 'step': 16058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:59.149347', 'step': 16058, 'epoch': 3} {'type': 'loss', 'content': 0.06073928624391556, 'timestamp': '2025-09-10 02:44:59.155102', 'step': 16059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:44:59.198393', 'step': 16059, 'epoch': 3} {'type': 'loss', 'content': 0.0392291434109211, 'timestamp': '2025-09-10 02:44:59.231221', 'step': 16060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:59.301089', 'step': 16060, 'epoch': 3} {'type': 'loss', 'content': 0.052165575325489044, 'timestamp': '2025-09-10 02:44:59.308839', 'step': 16061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:59.363720', 'step': 16061, 'epoch': 3} {'type': 'loss', 'content': 0.07157809287309647, 'timestamp': '2025-09-10 02:44:59.372255', 'step': 16062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:44:59.432158', 'step': 16062, 'epoch': 3} {'type': 'loss', 'content': 0.06045098230242729, 'timestamp': '2025-09-10 02:44:59.439180', 'step': 16063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:59.486643', 'step': 16063, 'epoch': 3} {'type': 'loss', 'content': 0.10059919208288193, 'timestamp': '2025-09-10 02:44:59.516370', 'step': 16064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:44:59.565449', 'step': 16064, 'epoch': 3} {'type': 'loss', 'content': 0.065886490046978, 'timestamp': '2025-09-10 02:44:59.576311', 'step': 16065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:59.625255', 'step': 16065, 'epoch': 3} {'type': 'loss', 'content': 0.048693202435970306, 'timestamp': '2025-09-10 02:44:59.631593', 'step': 16066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:59.685495', 'step': 16066, 'epoch': 3} {'type': 'loss', 'content': 0.04998907074332237, 'timestamp': '2025-09-10 02:44:59.693546', 'step': 16067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:59.743476', 'step': 16067, 'epoch': 3} {'type': 'loss', 'content': 0.052592337131500244, 'timestamp': '2025-09-10 02:44:59.770654', 'step': 16068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:59.817167', 'step': 16068, 'epoch': 3} {'type': 'loss', 'content': 0.14150887727737427, 'timestamp': '2025-09-10 02:44:59.830641', 'step': 16069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:44:59.913454', 'step': 16069, 'epoch': 3} {'type': 'loss', 'content': 0.0819624587893486, 'timestamp': '2025-09-10 02:44:59.921205', 'step': 16070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:44:59.993787', 'step': 16070, 'epoch': 3} {'type': 'loss', 'content': 0.07226592302322388, 'timestamp': '2025-09-10 02:44:59.998161', 'step': 16071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:00.038928', 'step': 16071, 'epoch': 3} {'type': 'loss', 'content': 0.04092409089207649, 'timestamp': '2025-09-10 02:45:00.070618', 'step': 16072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:00.129737', 'step': 16072, 'epoch': 3} {'type': 'loss', 'content': 0.06938852369785309, 'timestamp': '2025-09-10 02:45:00.138226', 'step': 16073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:00.192073', 'step': 16073, 'epoch': 3} {'type': 'loss', 'content': 0.06587250530719757, 'timestamp': '2025-09-10 02:45:00.198707', 'step': 16074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:00.244452', 'step': 16074, 'epoch': 3} {'type': 'loss', 'content': 0.06189088523387909, 'timestamp': '2025-09-10 02:45:00.252558', 'step': 16075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:45:00.302504', 'step': 16075, 'epoch': 3} {'type': 'loss', 'content': 0.06160655990242958, 'timestamp': '2025-09-10 02:45:00.331255', 'step': 16076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:00.393363', 'step': 16076, 'epoch': 3} {'type': 'loss', 'content': 0.09340560436248779, 'timestamp': '2025-09-10 02:45:00.404253', 'step': 16077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:00.459109', 'step': 16077, 'epoch': 3} {'type': 'loss', 'content': 0.09374026209115982, 'timestamp': '2025-09-10 02:45:00.475610', 'step': 16078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:00.546096', 'step': 16078, 'epoch': 3} {'type': 'loss', 'content': 0.02204962633550167, 'timestamp': '2025-09-10 02:45:00.557420', 'step': 16079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:00.623111', 'step': 16079, 'epoch': 3} {'type': 'loss', 'content': 0.04055829346179962, 'timestamp': '2025-09-10 02:45:00.660875', 'step': 16080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:00.727523', 'step': 16080, 'epoch': 3} {'type': 'loss', 'content': 0.08594532310962677, 'timestamp': '2025-09-10 02:45:00.747826', 'step': 16081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:00.798877', 'step': 16081, 'epoch': 3} {'type': 'loss', 'content': 0.0924702137708664, 'timestamp': '2025-09-10 02:45:00.804326', 'step': 16082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:00.857703', 'step': 16082, 'epoch': 3} {'type': 'loss', 'content': 0.03992979973554611, 'timestamp': '2025-09-10 02:45:00.869070', 'step': 16083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:00.924002', 'step': 16083, 'epoch': 3} {'type': 'loss', 'content': 0.029682401567697525, 'timestamp': '2025-09-10 02:45:00.953045', 'step': 16084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:00.998356', 'step': 16084, 'epoch': 3} {'type': 'loss', 'content': 0.08634582906961441, 'timestamp': '2025-09-10 02:45:01.013111', 'step': 16085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:01.097778', 'step': 16085, 'epoch': 3} {'type': 'loss', 'content': 0.07106416672468185, 'timestamp': '2025-09-10 02:45:01.105308', 'step': 16086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:01.167636', 'step': 16086, 'epoch': 3} {'type': 'loss', 'content': 0.07924263924360275, 'timestamp': '2025-09-10 02:45:01.171419', 'step': 16087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:01.222245', 'step': 16087, 'epoch': 3} {'type': 'loss', 'content': 0.025988850742578506, 'timestamp': '2025-09-10 02:45:01.268836', 'step': 16088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:01.319153', 'step': 16088, 'epoch': 3} {'type': 'loss', 'content': 0.07152943313121796, 'timestamp': '2025-09-10 02:45:01.336166', 'step': 16089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:01.396271', 'step': 16089, 'epoch': 3} {'type': 'loss', 'content': 0.13389039039611816, 'timestamp': '2025-09-10 02:45:01.406516', 'step': 16090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:01.465398', 'step': 16090, 'epoch': 3} {'type': 'loss', 'content': 0.06535201519727707, 'timestamp': '2025-09-10 02:45:01.474851', 'step': 16091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:01.531993', 'step': 16091, 'epoch': 3} {'type': 'loss', 'content': 0.08461697399616241, 'timestamp': '2025-09-10 02:45:01.575345', 'step': 16092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:01.640858', 'step': 16092, 'epoch': 3} {'type': 'loss', 'content': 0.030029296875, 'timestamp': '2025-09-10 02:45:01.651526', 'step': 16093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:01.725243', 'step': 16093, 'epoch': 3} {'type': 'loss', 'content': 0.10485994815826416, 'timestamp': '2025-09-10 02:45:01.736362', 'step': 16094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:01.794447', 'step': 16094, 'epoch': 3} {'type': 'loss', 'content': 0.08062493801116943, 'timestamp': '2025-09-10 02:45:01.804923', 'step': 16095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:01.860725', 'step': 16095, 'epoch': 3} {'type': 'loss', 'content': 0.12305966764688492, 'timestamp': '2025-09-10 02:45:01.888841', 'step': 16096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:01.943446', 'step': 16096, 'epoch': 3} {'type': 'loss', 'content': 0.03503512218594551, 'timestamp': '2025-09-10 02:45:01.953436', 'step': 16097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:02.004045', 'step': 16097, 'epoch': 3} {'type': 'loss', 'content': 0.09449012577533722, 'timestamp': '2025-09-10 02:45:02.013072', 'step': 16098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:02.067709', 'step': 16098, 'epoch': 3} {'type': 'loss', 'content': 0.09262941032648087, 'timestamp': '2025-09-10 02:45:02.076069', 'step': 16099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:02.126095', 'step': 16099, 'epoch': 3} {'type': 'loss', 'content': 0.11807116121053696, 'timestamp': '2025-09-10 02:45:02.155834', 'step': 16100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:02.207567', 'step': 16100, 'epoch': 3} {'type': 'loss', 'content': 0.07045240700244904, 'timestamp': '2025-09-10 02:45:02.220498', 'step': 16101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:02.274417', 'step': 16101, 'epoch': 3} {'type': 'loss', 'content': 0.06438953429460526, 'timestamp': '2025-09-10 02:45:02.292411', 'step': 16102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:02.351490', 'step': 16102, 'epoch': 3} {'type': 'loss', 'content': 0.07260292023420334, 'timestamp': '2025-09-10 02:45:02.367292', 'step': 16103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:02.426927', 'step': 16103, 'epoch': 3} {'type': 'loss', 'content': 0.04791249707341194, 'timestamp': '2025-09-10 02:45:02.457126', 'step': 16104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:02.519145', 'step': 16104, 'epoch': 3} {'type': 'loss', 'content': 0.03319016844034195, 'timestamp': '2025-09-10 02:45:02.530540', 'step': 16105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:02.587468', 'step': 16105, 'epoch': 3} {'type': 'loss', 'content': 0.03470202162861824, 'timestamp': '2025-09-10 02:45:02.604841', 'step': 16106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:02.657791', 'step': 16106, 'epoch': 3} {'type': 'loss', 'content': 0.0867089182138443, 'timestamp': '2025-09-10 02:45:02.670025', 'step': 16107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:02.752196', 'step': 16107, 'epoch': 3} {'type': 'loss', 'content': 0.08875498175621033, 'timestamp': '2025-09-10 02:45:02.782602', 'step': 16108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:02.834127', 'step': 16108, 'epoch': 3} {'type': 'loss', 'content': 0.04592246189713478, 'timestamp': '2025-09-10 02:45:02.847934', 'step': 16109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:02.914846', 'step': 16109, 'epoch': 3} {'type': 'loss', 'content': 0.054491132497787476, 'timestamp': '2025-09-10 02:45:02.926814', 'step': 16110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:02.991035', 'step': 16110, 'epoch': 3} {'type': 'loss', 'content': 0.10580933839082718, 'timestamp': '2025-09-10 02:45:02.997392', 'step': 16111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:03.051754', 'step': 16111, 'epoch': 3} {'type': 'loss', 'content': 0.037897463887929916, 'timestamp': '2025-09-10 02:45:03.083219', 'step': 16112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:03.132417', 'step': 16112, 'epoch': 3} {'type': 'loss', 'content': 0.17704348266124725, 'timestamp': '2025-09-10 02:45:03.142460', 'step': 16113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:03.189787', 'step': 16113, 'epoch': 3} {'type': 'loss', 'content': 0.09423010051250458, 'timestamp': '2025-09-10 02:45:03.202440', 'step': 16114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:03.242922', 'step': 16114, 'epoch': 3} {'type': 'loss', 'content': 0.058673981577157974, 'timestamp': '2025-09-10 02:45:03.248035', 'step': 16115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:03.299438', 'step': 16115, 'epoch': 3} {'type': 'loss', 'content': 0.040916748344898224, 'timestamp': '2025-09-10 02:45:03.328337', 'step': 16116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:03.383017', 'step': 16116, 'epoch': 3} {'type': 'loss', 'content': 0.046142131090164185, 'timestamp': '2025-09-10 02:45:03.390775', 'step': 16117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:03.436468', 'step': 16117, 'epoch': 3} {'type': 'loss', 'content': 0.09103444218635559, 'timestamp': '2025-09-10 02:45:03.443876', 'step': 16118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:03.497666', 'step': 16118, 'epoch': 3} {'type': 'loss', 'content': 0.03093726746737957, 'timestamp': '2025-09-10 02:45:03.504396', 'step': 16119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:03.551198', 'step': 16119, 'epoch': 3} {'type': 'loss', 'content': 0.08285655081272125, 'timestamp': '2025-09-10 02:45:03.583980', 'step': 16120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:03.625918', 'step': 16120, 'epoch': 3} {'type': 'loss', 'content': 0.012831080704927444, 'timestamp': '2025-09-10 02:45:03.634456', 'step': 16121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:03.680701', 'step': 16121, 'epoch': 3} {'type': 'loss', 'content': 0.033148862421512604, 'timestamp': '2025-09-10 02:45:03.690106', 'step': 16122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:03.743028', 'step': 16122, 'epoch': 3} {'type': 'loss', 'content': 0.05720454081892967, 'timestamp': '2025-09-10 02:45:03.756545', 'step': 16123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:03.809713', 'step': 16123, 'epoch': 3} {'type': 'loss', 'content': 0.06731045991182327, 'timestamp': '2025-09-10 02:45:03.852851', 'step': 16124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:03.917003', 'step': 16124, 'epoch': 3} {'type': 'loss', 'content': 0.07632578164339066, 'timestamp': '2025-09-10 02:45:03.930270', 'step': 16125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:03.988093', 'step': 16125, 'epoch': 3} {'type': 'loss', 'content': 0.1840052753686905, 'timestamp': '2025-09-10 02:45:03.993392', 'step': 16126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:04.040146', 'step': 16126, 'epoch': 3} {'type': 'loss', 'content': 0.0718827024102211, 'timestamp': '2025-09-10 02:45:04.050004', 'step': 16127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:04.104112', 'step': 16127, 'epoch': 3} {'type': 'loss', 'content': 0.05136409029364586, 'timestamp': '2025-09-10 02:45:04.133612', 'step': 16128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:04.180943', 'step': 16128, 'epoch': 3} {'type': 'loss', 'content': 0.0718524381518364, 'timestamp': '2025-09-10 02:45:04.190601', 'step': 16129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:04.244787', 'step': 16129, 'epoch': 3} {'type': 'loss', 'content': 0.16647526621818542, 'timestamp': '2025-09-10 02:45:04.249135', 'step': 16130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:04.299739', 'step': 16130, 'epoch': 3} {'type': 'loss', 'content': 0.02338067628443241, 'timestamp': '2025-09-10 02:45:04.309923', 'step': 16131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:45:04.361748', 'step': 16131, 'epoch': 3} {'type': 'loss', 'content': 0.11183367669582367, 'timestamp': '2025-09-10 02:45:04.393370', 'step': 16132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:04.445547', 'step': 16132, 'epoch': 3} {'type': 'loss', 'content': 0.05360327661037445, 'timestamp': '2025-09-10 02:45:04.451788', 'step': 16133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:04.504890', 'step': 16133, 'epoch': 3} {'type': 'loss', 'content': 0.051292091608047485, 'timestamp': '2025-09-10 02:45:04.513493', 'step': 16134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:04.567778', 'step': 16134, 'epoch': 3} {'type': 'loss', 'content': 0.07757412642240524, 'timestamp': '2025-09-10 02:45:04.575376', 'step': 16135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:04.614878', 'step': 16135, 'epoch': 3} {'type': 'loss', 'content': 0.06201094016432762, 'timestamp': '2025-09-10 02:45:04.641885', 'step': 16136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:04.688281', 'step': 16136, 'epoch': 3} {'type': 'loss', 'content': 0.08904364705085754, 'timestamp': '2025-09-10 02:45:04.700116', 'step': 16137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:04.752215', 'step': 16137, 'epoch': 3} {'type': 'loss', 'content': 0.057005904614925385, 'timestamp': '2025-09-10 02:45:04.765460', 'step': 16138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:04.809430', 'step': 16138, 'epoch': 3} {'type': 'loss', 'content': 0.09856558591127396, 'timestamp': '2025-09-10 02:45:04.820891', 'step': 16139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:04.862687', 'step': 16139, 'epoch': 3} {'type': 'loss', 'content': 0.06541604548692703, 'timestamp': '2025-09-10 02:45:04.893873', 'step': 16140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:04.932813', 'step': 16140, 'epoch': 3} {'type': 'loss', 'content': 0.11469794809818268, 'timestamp': '2025-09-10 02:45:04.941897', 'step': 16141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:04.987699', 'step': 16141, 'epoch': 3} {'type': 'loss', 'content': 0.04780229926109314, 'timestamp': '2025-09-10 02:45:04.993316', 'step': 16142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:05.034304', 'step': 16142, 'epoch': 3} {'type': 'loss', 'content': 0.046237390488386154, 'timestamp': '2025-09-10 02:45:05.039866', 'step': 16143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:05.093301', 'step': 16143, 'epoch': 3} {'type': 'loss', 'content': 0.06717398017644882, 'timestamp': '2025-09-10 02:45:05.121958', 'step': 16144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:05.191372', 'step': 16144, 'epoch': 3} {'type': 'loss', 'content': 0.0567585825920105, 'timestamp': '2025-09-10 02:45:05.201326', 'step': 16145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:05.249366', 'step': 16145, 'epoch': 3} {'type': 'loss', 'content': 0.15174536406993866, 'timestamp': '2025-09-10 02:45:05.257169', 'step': 16146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:05.316011', 'step': 16146, 'epoch': 3} {'type': 'loss', 'content': 0.08019834011793137, 'timestamp': '2025-09-10 02:45:05.326469', 'step': 16147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:05.398725', 'step': 16147, 'epoch': 3} {'type': 'loss', 'content': 0.06625688076019287, 'timestamp': '2025-09-10 02:45:05.430066', 'step': 16148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:05.472158', 'step': 16148, 'epoch': 3} {'type': 'loss', 'content': 0.035405971109867096, 'timestamp': '2025-09-10 02:45:05.478414', 'step': 16149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:05.527187', 'step': 16149, 'epoch': 3} {'type': 'loss', 'content': 0.10265697538852692, 'timestamp': '2025-09-10 02:45:05.535195', 'step': 16150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:05.581908', 'step': 16150, 'epoch': 3} {'type': 'loss', 'content': 0.17375007271766663, 'timestamp': '2025-09-10 02:45:05.590889', 'step': 16151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:05.633824', 'step': 16151, 'epoch': 3} {'type': 'loss', 'content': 0.10860580950975418, 'timestamp': '2025-09-10 02:45:05.666850', 'step': 16152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:05.713578', 'step': 16152, 'epoch': 3} {'type': 'loss', 'content': 0.07730738818645477, 'timestamp': '2025-09-10 02:45:05.724163', 'step': 16153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:05.776786', 'step': 16153, 'epoch': 3} {'type': 'loss', 'content': 0.08357973396778107, 'timestamp': '2025-09-10 02:45:05.783809', 'step': 16154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:05.830447', 'step': 16154, 'epoch': 3} {'type': 'loss', 'content': 0.03541329503059387, 'timestamp': '2025-09-10 02:45:05.842028', 'step': 16155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:05.890164', 'step': 16155, 'epoch': 3} {'type': 'loss', 'content': 0.036146365106105804, 'timestamp': '2025-09-10 02:45:05.916075', 'step': 16156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:05.964047', 'step': 16156, 'epoch': 3} {'type': 'loss', 'content': 0.15542486310005188, 'timestamp': '2025-09-10 02:45:05.990654', 'step': 16157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-10 02:45:06.046610', 'step': 16157, 'epoch': 3} {'type': 'loss', 'content': 0.08993624895811081, 'timestamp': '2025-09-10 02:45:06.060852', 'step': 16158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:06.115312', 'step': 16158, 'epoch': 3} {'type': 'loss', 'content': 0.07500971108675003, 'timestamp': '2025-09-10 02:45:06.123419', 'step': 16159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:06.176041', 'step': 16159, 'epoch': 3} {'type': 'loss', 'content': 0.07832825928926468, 'timestamp': '2025-09-10 02:45:06.207750', 'step': 16160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:06.260131', 'step': 16160, 'epoch': 3} {'type': 'loss', 'content': 0.16550961136817932, 'timestamp': '2025-09-10 02:45:06.268077', 'step': 16161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:06.322347', 'step': 16161, 'epoch': 3} {'type': 'loss', 'content': 0.07605572789907455, 'timestamp': '2025-09-10 02:45:06.330050', 'step': 16162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:06.383298', 'step': 16162, 'epoch': 3} {'type': 'loss', 'content': 0.04129762947559357, 'timestamp': '2025-09-10 02:45:06.391472', 'step': 16163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:06.443127', 'step': 16163, 'epoch': 3} {'type': 'loss', 'content': 0.08957527577877045, 'timestamp': '2025-09-10 02:45:06.479797', 'step': 16164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:06.532404', 'step': 16164, 'epoch': 3} {'type': 'loss', 'content': 0.06553491204977036, 'timestamp': '2025-09-10 02:45:06.545097', 'step': 16165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:06.591125', 'step': 16165, 'epoch': 3} {'type': 'loss', 'content': 0.056906234472990036, 'timestamp': '2025-09-10 02:45:06.602047', 'step': 16166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:06.665947', 'step': 16166, 'epoch': 3} {'type': 'loss', 'content': 0.06133062765002251, 'timestamp': '2025-09-10 02:45:06.675013', 'step': 16167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:06.737655', 'step': 16167, 'epoch': 3} {'type': 'loss', 'content': 0.0467725470662117, 'timestamp': '2025-09-10 02:45:06.769727', 'step': 16168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:06.836012', 'step': 16168, 'epoch': 3} {'type': 'loss', 'content': 0.04381100460886955, 'timestamp': '2025-09-10 02:45:06.845665', 'step': 16169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:06.887648', 'step': 16169, 'epoch': 3} {'type': 'loss', 'content': 0.09845112264156342, 'timestamp': '2025-09-10 02:45:06.898081', 'step': 16170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:06.949562', 'step': 16170, 'epoch': 3} {'type': 'loss', 'content': 0.05106566473841667, 'timestamp': '2025-09-10 02:45:06.959841', 'step': 16171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:07.011009', 'step': 16171, 'epoch': 3} {'type': 'loss', 'content': 0.083203986287117, 'timestamp': '2025-09-10 02:45:07.045709', 'step': 16172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.104864', 'step': 16172, 'epoch': 3} {'type': 'loss', 'content': 0.08297743648290634, 'timestamp': '2025-09-10 02:45:07.114794', 'step': 16173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.177283', 'step': 16173, 'epoch': 3} {'type': 'loss', 'content': 0.0644821748137474, 'timestamp': '2025-09-10 02:45:07.187635', 'step': 16174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.242433', 'step': 16174, 'epoch': 3} {'type': 'loss', 'content': 0.06815585494041443, 'timestamp': '2025-09-10 02:45:07.257007', 'step': 16175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:07.317874', 'step': 16175, 'epoch': 3} {'type': 'loss', 'content': 0.0843014195561409, 'timestamp': '2025-09-10 02:45:07.351079', 'step': 16176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.393329', 'step': 16176, 'epoch': 3} {'type': 'loss', 'content': 0.04006458446383476, 'timestamp': '2025-09-10 02:45:07.399450', 'step': 16177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.449040', 'step': 16177, 'epoch': 3} {'type': 'loss', 'content': 0.12919127941131592, 'timestamp': '2025-09-10 02:45:07.457604', 'step': 16178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.522849', 'step': 16178, 'epoch': 3} {'type': 'loss', 'content': 0.11824185401201248, 'timestamp': '2025-09-10 02:45:07.529219', 'step': 16179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.587484', 'step': 16179, 'epoch': 3} {'type': 'loss', 'content': 0.1105651780962944, 'timestamp': '2025-09-10 02:45:07.615704', 'step': 16180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:07.673465', 'step': 16180, 'epoch': 3} {'type': 'loss', 'content': 0.06387051194906235, 'timestamp': '2025-09-10 02:45:07.678492', 'step': 16181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:07.725538', 'step': 16181, 'epoch': 3} {'type': 'loss', 'content': 0.14219732582569122, 'timestamp': '2025-09-10 02:45:07.733639', 'step': 16182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.785529', 'step': 16182, 'epoch': 3} {'type': 'loss', 'content': 0.06965083628892899, 'timestamp': '2025-09-10 02:45:07.796877', 'step': 16183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.845856', 'step': 16183, 'epoch': 3} {'type': 'loss', 'content': 0.04592152684926987, 'timestamp': '2025-09-10 02:45:07.871827', 'step': 16184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:07.918838', 'step': 16184, 'epoch': 3} {'type': 'loss', 'content': 0.038774751126766205, 'timestamp': '2025-09-10 02:45:07.939139', 'step': 16185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:07.992821', 'step': 16185, 'epoch': 3} {'type': 'loss', 'content': 0.03931581974029541, 'timestamp': '2025-09-10 02:45:08.010882', 'step': 16186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:08.058082', 'step': 16186, 'epoch': 3} {'type': 'loss', 'content': 0.15153686702251434, 'timestamp': '2025-09-10 02:45:08.068263', 'step': 16187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:08.127820', 'step': 16187, 'epoch': 3} {'type': 'loss', 'content': 0.021809589117765427, 'timestamp': '2025-09-10 02:45:08.157533', 'step': 16188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:08.196138', 'step': 16188, 'epoch': 3} {'type': 'loss', 'content': 0.06856617331504822, 'timestamp': '2025-09-10 02:45:08.202476', 'step': 16189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:08.255035', 'step': 16189, 'epoch': 3} {'type': 'loss', 'content': 0.06284315139055252, 'timestamp': '2025-09-10 02:45:08.261679', 'step': 16190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:08.309248', 'step': 16190, 'epoch': 3} {'type': 'loss', 'content': 0.0849328488111496, 'timestamp': '2025-09-10 02:45:08.317635', 'step': 16191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:08.367744', 'step': 16191, 'epoch': 3} {'type': 'loss', 'content': 0.06042364612221718, 'timestamp': '2025-09-10 02:45:08.401226', 'step': 16192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:08.446888', 'step': 16192, 'epoch': 3} {'type': 'loss', 'content': 0.10557039082050323, 'timestamp': '2025-09-10 02:45:08.455887', 'step': 16193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:08.498683', 'step': 16193, 'epoch': 3} {'type': 'loss', 'content': 0.051930490881204605, 'timestamp': '2025-09-10 02:45:08.504836', 'step': 16194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:08.558131', 'step': 16194, 'epoch': 3} {'type': 'loss', 'content': 0.06382482498884201, 'timestamp': '2025-09-10 02:45:08.561663', 'step': 16195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:08.605807', 'step': 16195, 'epoch': 3} {'type': 'loss', 'content': 0.09564503282308578, 'timestamp': '2025-09-10 02:45:08.632634', 'step': 16196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:08.674616', 'step': 16196, 'epoch': 3} {'type': 'loss', 'content': 0.12435900419950485, 'timestamp': '2025-09-10 02:45:08.678536', 'step': 16197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:08.718703', 'step': 16197, 'epoch': 3} {'type': 'loss', 'content': 0.08324743062257767, 'timestamp': '2025-09-10 02:45:08.727332', 'step': 16198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:08.769445', 'step': 16198, 'epoch': 3} {'type': 'loss', 'content': 0.07481595873832703, 'timestamp': '2025-09-10 02:45:08.773218', 'step': 16199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:08.808980', 'step': 16199, 'epoch': 3} {'type': 'loss', 'content': 0.0966593325138092, 'timestamp': '2025-09-10 02:45:08.838817', 'step': 16200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:08.877663', 'step': 16200, 'epoch': 3} {'type': 'loss', 'content': 0.09810657799243927, 'timestamp': '2025-09-10 02:45:08.881993', 'step': 16201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:08.918379', 'step': 16201, 'epoch': 3} {'type': 'loss', 'content': 0.0765579417347908, 'timestamp': '2025-09-10 02:45:08.922907', 'step': 16202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:08.957907', 'step': 16202, 'epoch': 3} {'type': 'loss', 'content': 0.11263630539178848, 'timestamp': '2025-09-10 02:45:08.966122', 'step': 16203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:09.001843', 'step': 16203, 'epoch': 3} {'type': 'loss', 'content': 0.08008851855993271, 'timestamp': '2025-09-10 02:45:09.026690', 'step': 16204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:09.070712', 'step': 16204, 'epoch': 3} {'type': 'loss', 'content': 0.03166399523615837, 'timestamp': '2025-09-10 02:45:09.078461', 'step': 16205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:09.120964', 'step': 16205, 'epoch': 3} {'type': 'loss', 'content': 0.11086264997720718, 'timestamp': '2025-09-10 02:45:09.126306', 'step': 16206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:09.162311', 'step': 16206, 'epoch': 3} {'type': 'loss', 'content': 0.14421555399894714, 'timestamp': '2025-09-10 02:45:09.167171', 'step': 16207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:09.209406', 'step': 16207, 'epoch': 3} {'type': 'loss', 'content': 0.038478538393974304, 'timestamp': '2025-09-10 02:45:09.235253', 'step': 16208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:09.272567', 'step': 16208, 'epoch': 3} {'type': 'loss', 'content': 0.09110090881586075, 'timestamp': '2025-09-10 02:45:09.276175', 'step': 16209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:09.319839', 'step': 16209, 'epoch': 3} {'type': 'loss', 'content': 0.08196868002414703, 'timestamp': '2025-09-10 02:45:09.326025', 'step': 16210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:09.368526', 'step': 16210, 'epoch': 3} {'type': 'loss', 'content': 0.09574538469314575, 'timestamp': '2025-09-10 02:45:09.374722', 'step': 16211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:09.417521', 'step': 16211, 'epoch': 3} {'type': 'loss', 'content': 0.03739353269338608, 'timestamp': '2025-09-10 02:45:09.444871', 'step': 16212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:09.491718', 'step': 16212, 'epoch': 3} {'type': 'loss', 'content': 0.04191860184073448, 'timestamp': '2025-09-10 02:45:09.498893', 'step': 16213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:09.539448', 'step': 16213, 'epoch': 3} {'type': 'loss', 'content': 0.05699728801846504, 'timestamp': '2025-09-10 02:45:09.545021', 'step': 16214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:09.592364', 'step': 16214, 'epoch': 3} {'type': 'loss', 'content': 0.06417197734117508, 'timestamp': '2025-09-10 02:45:09.597100', 'step': 16215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:09.637550', 'step': 16215, 'epoch': 3} {'type': 'loss', 'content': 0.11426427960395813, 'timestamp': '2025-09-10 02:45:09.668032', 'step': 16216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:09.716263', 'step': 16216, 'epoch': 3} {'type': 'loss', 'content': 0.028919363394379616, 'timestamp': '2025-09-10 02:45:09.732017', 'step': 16217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:09.781585', 'step': 16217, 'epoch': 3} {'type': 'loss', 'content': 0.041396141052246094, 'timestamp': '2025-09-10 02:45:09.788539', 'step': 16218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:09.832328', 'step': 16218, 'epoch': 3} {'type': 'loss', 'content': 0.08726070076227188, 'timestamp': '2025-09-10 02:45:09.840779', 'step': 16219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:09.876064', 'step': 16219, 'epoch': 3} {'type': 'loss', 'content': 0.03741971775889397, 'timestamp': '2025-09-10 02:45:09.902286', 'step': 16220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:09.942707', 'step': 16220, 'epoch': 3} {'type': 'loss', 'content': 0.14853033423423767, 'timestamp': '2025-09-10 02:45:09.949192', 'step': 16221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:09.986918', 'step': 16221, 'epoch': 3} {'type': 'loss', 'content': 0.09279056638479233, 'timestamp': '2025-09-10 02:45:09.997146', 'step': 16222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:10.033876', 'step': 16222, 'epoch': 3} {'type': 'loss', 'content': 0.11759214103221893, 'timestamp': '2025-09-10 02:45:10.038488', 'step': 16223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:10.079124', 'step': 16223, 'epoch': 3} {'type': 'loss', 'content': 0.11283718794584274, 'timestamp': '2025-09-10 02:45:10.108652', 'step': 16224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:10.145330', 'step': 16224, 'epoch': 3} {'type': 'loss', 'content': 0.07518978416919708, 'timestamp': '2025-09-10 02:45:10.166172', 'step': 16225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:10.201974', 'step': 16225, 'epoch': 3} {'type': 'loss', 'content': 0.1285005807876587, 'timestamp': '2025-09-10 02:45:10.208974', 'step': 16226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:10.246239', 'step': 16226, 'epoch': 3} {'type': 'loss', 'content': 0.11934427917003632, 'timestamp': '2025-09-10 02:45:10.250802', 'step': 16227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:10.287451', 'step': 16227, 'epoch': 3} {'type': 'loss', 'content': 0.06862130761146545, 'timestamp': '2025-09-10 02:45:10.314348', 'step': 16228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:10.354279', 'step': 16228, 'epoch': 3} {'type': 'loss', 'content': 0.06690710783004761, 'timestamp': '2025-09-10 02:45:10.357740', 'step': 16229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:10.394663', 'step': 16229, 'epoch': 3} {'type': 'loss', 'content': 0.049034301191568375, 'timestamp': '2025-09-10 02:45:10.400249', 'step': 16230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:10.434177', 'step': 16230, 'epoch': 3} {'type': 'loss', 'content': 0.06053398922085762, 'timestamp': '2025-09-10 02:45:10.440384', 'step': 16231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:10.483620', 'step': 16231, 'epoch': 3} {'type': 'loss', 'content': 0.055692724883556366, 'timestamp': '2025-09-10 02:45:10.510504', 'step': 16232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:10.550105', 'step': 16232, 'epoch': 3} {'type': 'loss', 'content': 0.10682904720306396, 'timestamp': '2025-09-10 02:45:10.554912', 'step': 16233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:10.604760', 'step': 16233, 'epoch': 3} {'type': 'loss', 'content': 0.06271696090698242, 'timestamp': '2025-09-10 02:45:10.614265', 'step': 16234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:10.658300', 'step': 16234, 'epoch': 3} {'type': 'loss', 'content': 0.031142136082053185, 'timestamp': '2025-09-10 02:45:10.662697', 'step': 16235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:10.704897', 'step': 16235, 'epoch': 3} {'type': 'loss', 'content': 0.12074515223503113, 'timestamp': '2025-09-10 02:45:10.737474', 'step': 16236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:10.783175', 'step': 16236, 'epoch': 3} {'type': 'loss', 'content': 0.04461240768432617, 'timestamp': '2025-09-10 02:45:10.788942', 'step': 16237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:10.824074', 'step': 16237, 'epoch': 3} {'type': 'loss', 'content': 0.06392204016447067, 'timestamp': '2025-09-10 02:45:10.831045', 'step': 16238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:10.871204', 'step': 16238, 'epoch': 3} {'type': 'loss', 'content': 0.03531834855675697, 'timestamp': '2025-09-10 02:45:10.888604', 'step': 16239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:10.930367', 'step': 16239, 'epoch': 3} {'type': 'loss', 'content': 0.030548755079507828, 'timestamp': '2025-09-10 02:45:10.955951', 'step': 16240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:10.992218', 'step': 16240, 'epoch': 3} {'type': 'loss', 'content': 0.10425736010074615, 'timestamp': '2025-09-10 02:45:10.996636', 'step': 16241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:45:11.047601', 'step': 16241, 'epoch': 3} {'type': 'loss', 'content': 0.05562947317957878, 'timestamp': '2025-09-10 02:45:11.058597', 'step': 16242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:11.092897', 'step': 16242, 'epoch': 3} {'type': 'loss', 'content': 0.07703038305044174, 'timestamp': '2025-09-10 02:45:11.097455', 'step': 16243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:11.142552', 'step': 16243, 'epoch': 3} {'type': 'loss', 'content': 0.06704328954219818, 'timestamp': '2025-09-10 02:45:11.170793', 'step': 16244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:11.214884', 'step': 16244, 'epoch': 3} {'type': 'loss', 'content': 0.08211351186037064, 'timestamp': '2025-09-10 02:45:11.220652', 'step': 16245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:11.255895', 'step': 16245, 'epoch': 3} {'type': 'loss', 'content': 0.05676751583814621, 'timestamp': '2025-09-10 02:45:11.264436', 'step': 16246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:11.307357', 'step': 16246, 'epoch': 3} {'type': 'loss', 'content': 0.10658462345600128, 'timestamp': '2025-09-10 02:45:11.314233', 'step': 16247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:11.351204', 'step': 16247, 'epoch': 3} {'type': 'loss', 'content': 0.05260062962770462, 'timestamp': '2025-09-10 02:45:11.378674', 'step': 16248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:11.421555', 'step': 16248, 'epoch': 3} {'type': 'loss', 'content': 0.07143696397542953, 'timestamp': '2025-09-10 02:45:11.427780', 'step': 16249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:11.468032', 'step': 16249, 'epoch': 3} {'type': 'loss', 'content': 0.027698110789060593, 'timestamp': '2025-09-10 02:45:11.478886', 'step': 16250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:11.517528', 'step': 16250, 'epoch': 3} {'type': 'loss', 'content': 0.0715957060456276, 'timestamp': '2025-09-10 02:45:11.530457', 'step': 16251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:11.569861', 'step': 16251, 'epoch': 3} {'type': 'loss', 'content': 0.13176822662353516, 'timestamp': '2025-09-10 02:45:11.596507', 'step': 16252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:11.635926', 'step': 16252, 'epoch': 3} {'type': 'loss', 'content': 0.09593719244003296, 'timestamp': '2025-09-10 02:45:11.639677', 'step': 16253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:11.680619', 'step': 16253, 'epoch': 3} {'type': 'loss', 'content': 0.040243133902549744, 'timestamp': '2025-09-10 02:45:11.688327', 'step': 16254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:11.727205', 'step': 16254, 'epoch': 3} {'type': 'loss', 'content': 0.07562693953514099, 'timestamp': '2025-09-10 02:45:11.733355', 'step': 16255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:11.769738', 'step': 16255, 'epoch': 3} {'type': 'loss', 'content': 0.03936674818396568, 'timestamp': '2025-09-10 02:45:11.797023', 'step': 16256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:11.839197', 'step': 16256, 'epoch': 3} {'type': 'loss', 'content': 0.041642047464847565, 'timestamp': '2025-09-10 02:45:11.843460', 'step': 16257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:11.893008', 'step': 16257, 'epoch': 3} {'type': 'loss', 'content': 0.038035403937101364, 'timestamp': '2025-09-10 02:45:11.895470', 'step': 16258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:11.925684', 'step': 16258, 'epoch': 3} {'type': 'loss', 'content': 0.07856753468513489, 'timestamp': '2025-09-10 02:45:11.928124', 'step': 16259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:11.958502', 'step': 16259, 'epoch': 3} {'type': 'loss', 'content': 0.03931426256895065, 'timestamp': '2025-09-10 02:45:11.982195', 'step': 16260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.012974', 'step': 16260, 'epoch': 3} {'type': 'loss', 'content': 0.07538620382547379, 'timestamp': '2025-09-10 02:45:12.015357', 'step': 16261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.046420', 'step': 16261, 'epoch': 3} {'type': 'loss', 'content': 0.04941689968109131, 'timestamp': '2025-09-10 02:45:12.049174', 'step': 16262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:12.080384', 'step': 16262, 'epoch': 3} {'type': 'loss', 'content': 0.031245088204741478, 'timestamp': '2025-09-10 02:45:12.083101', 'step': 16263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:12.114252', 'step': 16263, 'epoch': 3} {'type': 'loss', 'content': 0.04586334526538849, 'timestamp': '2025-09-10 02:45:12.140536', 'step': 16264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:12.171453', 'step': 16264, 'epoch': 3} {'type': 'loss', 'content': 0.13700103759765625, 'timestamp': '2025-09-10 02:45:12.174013', 'step': 16265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.204230', 'step': 16265, 'epoch': 3} {'type': 'loss', 'content': 0.03386921063065529, 'timestamp': '2025-09-10 02:45:12.207147', 'step': 16266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:12.237469', 'step': 16266, 'epoch': 3} {'type': 'loss', 'content': 0.1105753630399704, 'timestamp': '2025-09-10 02:45:12.240144', 'step': 16267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.270233', 'step': 16267, 'epoch': 3} {'type': 'loss', 'content': 0.1287592649459839, 'timestamp': '2025-09-10 02:45:12.293915', 'step': 16268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.325294', 'step': 16268, 'epoch': 3} {'type': 'loss', 'content': 0.11740393191576004, 'timestamp': '2025-09-10 02:45:12.327925', 'step': 16269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:12.358841', 'step': 16269, 'epoch': 3} {'type': 'loss', 'content': 0.05636836215853691, 'timestamp': '2025-09-10 02:45:12.361252', 'step': 16270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.394320', 'step': 16270, 'epoch': 3} {'type': 'loss', 'content': 0.08602647483348846, 'timestamp': '2025-09-10 02:45:12.396817', 'step': 16271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:12.431597', 'step': 16271, 'epoch': 3} {'type': 'loss', 'content': 0.042722370475530624, 'timestamp': '2025-09-10 02:45:12.455584', 'step': 16272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.486408', 'step': 16272, 'epoch': 3} {'type': 'loss', 'content': 0.06682796776294708, 'timestamp': '2025-09-10 02:45:12.489181', 'step': 16273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.520972', 'step': 16273, 'epoch': 3} {'type': 'loss', 'content': 0.08654864132404327, 'timestamp': '2025-09-10 02:45:12.523358', 'step': 16274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.553634', 'step': 16274, 'epoch': 3} {'type': 'loss', 'content': 0.08944040536880493, 'timestamp': '2025-09-10 02:45:12.555891', 'step': 16275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:12.587475', 'step': 16275, 'epoch': 3} {'type': 'loss', 'content': 0.05668390914797783, 'timestamp': '2025-09-10 02:45:12.611256', 'step': 16276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:12.642174', 'step': 16276, 'epoch': 3} {'type': 'loss', 'content': 0.09733856469392776, 'timestamp': '2025-09-10 02:45:12.644367', 'step': 16277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.675616', 'step': 16277, 'epoch': 3} {'type': 'loss', 'content': 0.1168607696890831, 'timestamp': '2025-09-10 02:45:12.678843', 'step': 16278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:12.709372', 'step': 16278, 'epoch': 3} {'type': 'loss', 'content': 0.15161272883415222, 'timestamp': '2025-09-10 02:45:12.715272', 'step': 16279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:12.752761', 'step': 16279, 'epoch': 3} {'type': 'loss', 'content': 0.09058599174022675, 'timestamp': '2025-09-10 02:45:12.776865', 'step': 16280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.808210', 'step': 16280, 'epoch': 3} {'type': 'loss', 'content': 0.06612160056829453, 'timestamp': '2025-09-10 02:45:12.810427', 'step': 16281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:12.841159', 'step': 16281, 'epoch': 3} {'type': 'loss', 'content': 0.053323209285736084, 'timestamp': '2025-09-10 02:45:12.843836', 'step': 16282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:12.875822', 'step': 16282, 'epoch': 3} {'type': 'loss', 'content': 0.07240589708089828, 'timestamp': '2025-09-10 02:45:12.878515', 'step': 16283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:12.908820', 'step': 16283, 'epoch': 3} {'type': 'loss', 'content': 0.10308042913675308, 'timestamp': '2025-09-10 02:45:12.932929', 'step': 16284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:12.964234', 'step': 16284, 'epoch': 3} {'type': 'loss', 'content': 0.025992069393396378, 'timestamp': '2025-09-10 02:45:12.967753', 'step': 16285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:13.001554', 'step': 16285, 'epoch': 3} {'type': 'loss', 'content': 0.08294703811407089, 'timestamp': '2025-09-10 02:45:13.004440', 'step': 16286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:13.034786', 'step': 16286, 'epoch': 3} {'type': 'loss', 'content': 0.0905020534992218, 'timestamp': '2025-09-10 02:45:13.037062', 'step': 16287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:13.066757', 'step': 16287, 'epoch': 3} {'type': 'loss', 'content': 0.05522450804710388, 'timestamp': '2025-09-10 02:45:13.090836', 'step': 16288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:13.121328', 'step': 16288, 'epoch': 3} {'type': 'loss', 'content': 0.07934367656707764, 'timestamp': '2025-09-10 02:45:13.123740', 'step': 16289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:13.153468', 'step': 16289, 'epoch': 3} {'type': 'loss', 'content': 0.06198595464229584, 'timestamp': '2025-09-10 02:45:13.155415', 'step': 16290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:13.185457', 'step': 16290, 'epoch': 3} {'type': 'loss', 'content': 0.0637565329670906, 'timestamp': '2025-09-10 02:45:13.189301', 'step': 16291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:13.220283', 'step': 16291, 'epoch': 3} {'type': 'loss', 'content': 0.08093947172164917, 'timestamp': '2025-09-10 02:45:13.244361', 'step': 16292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:13.279064', 'step': 16292, 'epoch': 3} {'type': 'loss', 'content': 0.022725850343704224, 'timestamp': '2025-09-10 02:45:13.287403', 'step': 16293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:13.323669', 'step': 16293, 'epoch': 3} {'type': 'loss', 'content': 0.07007746398448944, 'timestamp': '2025-09-10 02:45:13.326254', 'step': 16294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:13.356859', 'step': 16294, 'epoch': 3} {'type': 'loss', 'content': 0.03495801240205765, 'timestamp': '2025-09-10 02:45:13.361609', 'step': 16295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:13.391928', 'step': 16295, 'epoch': 3} {'type': 'loss', 'content': 0.0931067168712616, 'timestamp': '2025-09-10 02:45:13.415746', 'step': 16296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:13.471689', 'step': 16296, 'epoch': 3} {'type': 'loss', 'content': 0.061990395188331604, 'timestamp': '2025-09-10 02:45:13.475060', 'step': 16297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:13.509517', 'step': 16297, 'epoch': 3} {'type': 'loss', 'content': 0.1433066427707672, 'timestamp': '2025-09-10 02:45:13.512213', 'step': 16298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:13.555172', 'step': 16298, 'epoch': 3} {'type': 'loss', 'content': 0.08184058219194412, 'timestamp': '2025-09-10 02:45:13.557994', 'step': 16299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:13.591802', 'step': 16299, 'epoch': 3} {'type': 'loss', 'content': 0.023253971710801125, 'timestamp': '2025-09-10 02:45:13.622395', 'step': 16300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:13.653917', 'step': 16300, 'epoch': 3} {'type': 'loss', 'content': 0.07357665151357651, 'timestamp': '2025-09-10 02:45:13.656923', 'step': 16301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:13.688958', 'step': 16301, 'epoch': 3} {'type': 'loss', 'content': 0.06275679916143417, 'timestamp': '2025-09-10 02:45:13.693003', 'step': 16302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:13.725655', 'step': 16302, 'epoch': 3} {'type': 'loss', 'content': 0.07466866075992584, 'timestamp': '2025-09-10 02:45:13.729384', 'step': 16303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:13.762043', 'step': 16303, 'epoch': 3} {'type': 'loss', 'content': 0.029065871611237526, 'timestamp': '2025-09-10 02:45:13.785982', 'step': 16304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:13.837321', 'step': 16304, 'epoch': 3} {'type': 'loss', 'content': 0.09627249091863632, 'timestamp': '2025-09-10 02:45:13.857600', 'step': 16305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:13.890747', 'step': 16305, 'epoch': 3} {'type': 'loss', 'content': 0.1207568496465683, 'timestamp': '2025-09-10 02:45:13.894184', 'step': 16306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:13.933097', 'step': 16306, 'epoch': 3} {'type': 'loss', 'content': 0.04419818893074989, 'timestamp': '2025-09-10 02:45:13.936905', 'step': 16307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:13.984218', 'step': 16307, 'epoch': 3} {'type': 'loss', 'content': 0.11192388832569122, 'timestamp': '2025-09-10 02:45:14.008431', 'step': 16308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:14.045891', 'step': 16308, 'epoch': 3} {'type': 'loss', 'content': 0.09007396548986435, 'timestamp': '2025-09-10 02:45:14.061780', 'step': 16309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:14.098699', 'step': 16309, 'epoch': 3} {'type': 'loss', 'content': 0.07486050575971603, 'timestamp': '2025-09-10 02:45:14.101319', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:45:21.972857', 'step': 16310, 'epoch': 3} {'type': 'pplx', 'content': 9191.859023407089, 'timestamp': '2025-09-10 02:45:21.975563', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:22.006841', 'step': 16310, 'epoch': 3} {'type': 'loss', 'content': 0.0844251960515976, 'timestamp': '2025-09-10 02:45:22.009230', 'step': 16311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:22.039935', 'step': 16311, 'epoch': 3} {'type': 'loss', 'content': 0.02919047884643078, 'timestamp': '2025-09-10 02:45:22.064365', 'step': 16312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:22.103249', 'step': 16312, 'epoch': 3} {'type': 'loss', 'content': 0.052063439041376114, 'timestamp': '2025-09-10 02:45:22.108976', 'step': 16313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:22.140771', 'step': 16313, 'epoch': 3} {'type': 'loss', 'content': 0.07843843102455139, 'timestamp': '2025-09-10 02:45:22.143118', 'step': 16314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:22.172938', 'step': 16314, 'epoch': 3} {'type': 'loss', 'content': 0.0728052407503128, 'timestamp': '2025-09-10 02:45:22.175322', 'step': 16315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:22.212956', 'step': 16315, 'epoch': 3} {'type': 'loss', 'content': 0.14593249559402466, 'timestamp': '2025-09-10 02:45:22.236646', 'step': 16316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:22.270287', 'step': 16316, 'epoch': 3} {'type': 'loss', 'content': 0.08752795308828354, 'timestamp': '2025-09-10 02:45:22.272753', 'step': 16317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:22.309124', 'step': 16317, 'epoch': 3} {'type': 'loss', 'content': 0.044932544231414795, 'timestamp': '2025-09-10 02:45:22.311445', 'step': 16318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:22.346813', 'step': 16318, 'epoch': 3} {'type': 'loss', 'content': 0.05410391092300415, 'timestamp': '2025-09-10 02:45:22.349476', 'step': 16319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:22.379921', 'step': 16319, 'epoch': 3} {'type': 'loss', 'content': 0.0900387316942215, 'timestamp': '2025-09-10 02:45:22.406290', 'step': 16320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:22.437155', 'step': 16320, 'epoch': 3} {'type': 'loss', 'content': 0.06523264944553375, 'timestamp': '2025-09-10 02:45:22.439645', 'step': 16321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:22.470461', 'step': 16321, 'epoch': 3} {'type': 'loss', 'content': 0.05511446297168732, 'timestamp': '2025-09-10 02:45:22.473020', 'step': 16322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:22.505961', 'step': 16322, 'epoch': 3} {'type': 'loss', 'content': 0.03987858444452286, 'timestamp': '2025-09-10 02:45:22.508473', 'step': 16323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:22.539813', 'step': 16323, 'epoch': 3} {'type': 'loss', 'content': 0.06361646950244904, 'timestamp': '2025-09-10 02:45:22.564935', 'step': 16324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:22.597427', 'step': 16324, 'epoch': 3} {'type': 'loss', 'content': 0.07287875562906265, 'timestamp': '2025-09-10 02:45:22.600873', 'step': 16325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:22.631888', 'step': 16325, 'epoch': 3} {'type': 'loss', 'content': 0.046526502817869186, 'timestamp': '2025-09-10 02:45:22.634757', 'step': 16326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:22.666767', 'step': 16326, 'epoch': 3} {'type': 'loss', 'content': 0.0627041757106781, 'timestamp': '2025-09-10 02:45:22.670928', 'step': 16327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:22.702678', 'step': 16327, 'epoch': 3} {'type': 'loss', 'content': 0.12136942893266678, 'timestamp': '2025-09-10 02:45:22.729453', 'step': 16328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:22.768328', 'step': 16328, 'epoch': 3} {'type': 'loss', 'content': 0.0390365794301033, 'timestamp': '2025-09-10 02:45:22.770765', 'step': 16329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:22.801023', 'step': 16329, 'epoch': 3} {'type': 'loss', 'content': 0.04005055129528046, 'timestamp': '2025-09-10 02:45:22.803907', 'step': 16330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:22.835105', 'step': 16330, 'epoch': 3} {'type': 'loss', 'content': 0.03061184659600258, 'timestamp': '2025-09-10 02:45:22.837628', 'step': 16331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:22.868581', 'step': 16331, 'epoch': 3} {'type': 'loss', 'content': 0.06000766530632973, 'timestamp': '2025-09-10 02:45:22.893727', 'step': 16332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:22.928203', 'step': 16332, 'epoch': 3} {'type': 'loss', 'content': 0.09731780737638474, 'timestamp': '2025-09-10 02:45:22.930616', 'step': 16333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:22.963086', 'step': 16333, 'epoch': 3} {'type': 'loss', 'content': 0.0639098733663559, 'timestamp': '2025-09-10 02:45:22.966112', 'step': 16334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:22.996942', 'step': 16334, 'epoch': 3} {'type': 'loss', 'content': 0.09867452830076218, 'timestamp': '2025-09-10 02:45:22.999119', 'step': 16335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:23.035739', 'step': 16335, 'epoch': 3} {'type': 'loss', 'content': 0.05500543862581253, 'timestamp': '2025-09-10 02:45:23.062866', 'step': 16336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:23.105697', 'step': 16336, 'epoch': 3} {'type': 'loss', 'content': 0.1826334148645401, 'timestamp': '2025-09-10 02:45:23.108127', 'step': 16337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:23.139130', 'step': 16337, 'epoch': 3} {'type': 'loss', 'content': 0.11791469901800156, 'timestamp': '2025-09-10 02:45:23.141722', 'step': 16338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:23.172343', 'step': 16338, 'epoch': 3} {'type': 'loss', 'content': 0.12635068595409393, 'timestamp': '2025-09-10 02:45:23.174505', 'step': 16339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:23.204620', 'step': 16339, 'epoch': 3} {'type': 'loss', 'content': 0.16437917947769165, 'timestamp': '2025-09-10 02:45:23.228713', 'step': 16340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:23.259323', 'step': 16340, 'epoch': 3} {'type': 'loss', 'content': 0.0227250624448061, 'timestamp': '2025-09-10 02:45:23.261788', 'step': 16341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:23.292886', 'step': 16341, 'epoch': 3} {'type': 'loss', 'content': 0.1677331179380417, 'timestamp': '2025-09-10 02:45:23.298928', 'step': 16342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:23.340937', 'step': 16342, 'epoch': 3} {'type': 'loss', 'content': 0.04340877756476402, 'timestamp': '2025-09-10 02:45:23.346586', 'step': 16343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:23.389035', 'step': 16343, 'epoch': 3} {'type': 'loss', 'content': 0.10238098353147507, 'timestamp': '2025-09-10 02:45:23.412541', 'step': 16344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:23.443997', 'step': 16344, 'epoch': 3} {'type': 'loss', 'content': 0.06919088959693909, 'timestamp': '2025-09-10 02:45:23.446268', 'step': 16345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:23.476793', 'step': 16345, 'epoch': 3} {'type': 'loss', 'content': 0.11702567338943481, 'timestamp': '2025-09-10 02:45:23.479158', 'step': 16346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:23.510655', 'step': 16346, 'epoch': 3} {'type': 'loss', 'content': 0.04795268177986145, 'timestamp': '2025-09-10 02:45:23.513350', 'step': 16347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:23.543777', 'step': 16347, 'epoch': 3} {'type': 'loss', 'content': 0.061550162732601166, 'timestamp': '2025-09-10 02:45:23.567520', 'step': 16348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:23.599307', 'step': 16348, 'epoch': 3} {'type': 'loss', 'content': 0.0237430352717638, 'timestamp': '2025-09-10 02:45:23.602886', 'step': 16349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:23.635969', 'step': 16349, 'epoch': 3} {'type': 'loss', 'content': 0.08541451394557953, 'timestamp': '2025-09-10 02:45:23.638637', 'step': 16350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:23.685592', 'step': 16350, 'epoch': 3} {'type': 'loss', 'content': 0.08588358014822006, 'timestamp': '2025-09-10 02:45:23.687842', 'step': 16351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:23.718512', 'step': 16351, 'epoch': 3} {'type': 'loss', 'content': 0.05933732911944389, 'timestamp': '2025-09-10 02:45:23.742431', 'step': 16352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:23.773918', 'step': 16352, 'epoch': 3} {'type': 'loss', 'content': 0.06039898470044136, 'timestamp': '2025-09-10 02:45:23.776339', 'step': 16353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:23.806891', 'step': 16353, 'epoch': 3} {'type': 'loss', 'content': 0.0979226678609848, 'timestamp': '2025-09-10 02:45:23.809535', 'step': 16354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:23.840673', 'step': 16354, 'epoch': 3} {'type': 'loss', 'content': 0.036294981837272644, 'timestamp': '2025-09-10 02:45:23.842757', 'step': 16355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:23.873510', 'step': 16355, 'epoch': 3} {'type': 'loss', 'content': 0.04609052091836929, 'timestamp': '2025-09-10 02:45:23.897471', 'step': 16356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:23.928401', 'step': 16356, 'epoch': 3} {'type': 'loss', 'content': 0.05920855328440666, 'timestamp': '2025-09-10 02:45:23.931146', 'step': 16357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:23.961750', 'step': 16357, 'epoch': 3} {'type': 'loss', 'content': 0.06151909381151199, 'timestamp': '2025-09-10 02:45:23.964673', 'step': 16358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:23.996053', 'step': 16358, 'epoch': 3} {'type': 'loss', 'content': 0.0661168098449707, 'timestamp': '2025-09-10 02:45:23.999472', 'step': 16359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:24.030125', 'step': 16359, 'epoch': 3} {'type': 'loss', 'content': 0.08416157960891724, 'timestamp': '2025-09-10 02:45:24.054087', 'step': 16360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:24.084800', 'step': 16360, 'epoch': 3} {'type': 'loss', 'content': 0.052177879959344864, 'timestamp': '2025-09-10 02:45:24.088754', 'step': 16361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.121836', 'step': 16361, 'epoch': 3} {'type': 'loss', 'content': 0.1246507540345192, 'timestamp': '2025-09-10 02:45:24.124382', 'step': 16362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:24.155128', 'step': 16362, 'epoch': 3} {'type': 'loss', 'content': 0.09182402491569519, 'timestamp': '2025-09-10 02:45:24.157642', 'step': 16363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.187759', 'step': 16363, 'epoch': 3} {'type': 'loss', 'content': 0.1157040074467659, 'timestamp': '2025-09-10 02:45:24.211689', 'step': 16364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:24.242550', 'step': 16364, 'epoch': 3} {'type': 'loss', 'content': 0.15833145380020142, 'timestamp': '2025-09-10 02:45:24.255743', 'step': 16365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:24.287151', 'step': 16365, 'epoch': 3} {'type': 'loss', 'content': 0.025916870683431625, 'timestamp': '2025-09-10 02:45:24.289355', 'step': 16366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:24.319012', 'step': 16366, 'epoch': 3} {'type': 'loss', 'content': 0.0652959942817688, 'timestamp': '2025-09-10 02:45:24.321507', 'step': 16367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.351493', 'step': 16367, 'epoch': 3} {'type': 'loss', 'content': 0.08636466413736343, 'timestamp': '2025-09-10 02:45:24.376378', 'step': 16368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.407104', 'step': 16368, 'epoch': 3} {'type': 'loss', 'content': 0.04596492648124695, 'timestamp': '2025-09-10 02:45:24.409256', 'step': 16369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:24.439630', 'step': 16369, 'epoch': 3} {'type': 'loss', 'content': 0.09498336166143417, 'timestamp': '2025-09-10 02:45:24.442482', 'step': 16370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.472505', 'step': 16370, 'epoch': 3} {'type': 'loss', 'content': 0.1383747160434723, 'timestamp': '2025-09-10 02:45:24.474596', 'step': 16371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.506381', 'step': 16371, 'epoch': 3} {'type': 'loss', 'content': 0.06682250648736954, 'timestamp': '2025-09-10 02:45:24.530089', 'step': 16372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.560646', 'step': 16372, 'epoch': 3} {'type': 'loss', 'content': 0.08659164607524872, 'timestamp': '2025-09-10 02:45:24.562977', 'step': 16373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:24.592888', 'step': 16373, 'epoch': 3} {'type': 'loss', 'content': 0.024600759148597717, 'timestamp': '2025-09-10 02:45:24.595141', 'step': 16374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.626041', 'step': 16374, 'epoch': 3} {'type': 'loss', 'content': 0.0936967134475708, 'timestamp': '2025-09-10 02:45:24.628314', 'step': 16375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:24.658391', 'step': 16375, 'epoch': 3} {'type': 'loss', 'content': 0.02886292338371277, 'timestamp': '2025-09-10 02:45:24.682109', 'step': 16376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:24.713240', 'step': 16376, 'epoch': 3} {'type': 'loss', 'content': 0.01450169924646616, 'timestamp': '2025-09-10 02:45:24.716051', 'step': 16377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:24.748739', 'step': 16377, 'epoch': 3} {'type': 'loss', 'content': 0.025289716199040413, 'timestamp': '2025-09-10 02:45:24.751108', 'step': 16378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:24.781772', 'step': 16378, 'epoch': 3} {'type': 'loss', 'content': 0.06215835362672806, 'timestamp': '2025-09-10 02:45:24.784113', 'step': 16379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.814324', 'step': 16379, 'epoch': 3} {'type': 'loss', 'content': 0.05477185919880867, 'timestamp': '2025-09-10 02:45:24.838263', 'step': 16380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:24.870920', 'step': 16380, 'epoch': 3} {'type': 'loss', 'content': 0.045992787927389145, 'timestamp': '2025-09-10 02:45:24.873247', 'step': 16381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.903976', 'step': 16381, 'epoch': 3} {'type': 'loss', 'content': 0.09487935155630112, 'timestamp': '2025-09-10 02:45:24.906437', 'step': 16382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:24.936760', 'step': 16382, 'epoch': 3} {'type': 'loss', 'content': 0.12191278487443924, 'timestamp': '2025-09-10 02:45:24.939540', 'step': 16383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:24.971939', 'step': 16383, 'epoch': 3} {'type': 'loss', 'content': 0.11228670179843903, 'timestamp': '2025-09-10 02:45:24.995268', 'step': 16384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.026487', 'step': 16384, 'epoch': 3} {'type': 'loss', 'content': 0.056968577206134796, 'timestamp': '2025-09-10 02:45:25.029040', 'step': 16385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:25.059454', 'step': 16385, 'epoch': 3} {'type': 'loss', 'content': 0.05058707669377327, 'timestamp': '2025-09-10 02:45:25.061842', 'step': 16386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:25.092210', 'step': 16386, 'epoch': 3} {'type': 'loss', 'content': 0.06935645639896393, 'timestamp': '2025-09-10 02:45:25.094430', 'step': 16387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:25.124423', 'step': 16387, 'epoch': 3} {'type': 'loss', 'content': 0.14063264429569244, 'timestamp': '2025-09-10 02:45:25.148421', 'step': 16388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:25.179494', 'step': 16388, 'epoch': 3} {'type': 'loss', 'content': 0.15284788608551025, 'timestamp': '2025-09-10 02:45:25.181688', 'step': 16389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:25.211634', 'step': 16389, 'epoch': 3} {'type': 'loss', 'content': 0.08004792034626007, 'timestamp': '2025-09-10 02:45:25.213974', 'step': 16390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.244735', 'step': 16390, 'epoch': 3} {'type': 'loss', 'content': 0.041123420000076294, 'timestamp': '2025-09-10 02:45:25.247258', 'step': 16391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:25.277515', 'step': 16391, 'epoch': 3} {'type': 'loss', 'content': 0.03400047868490219, 'timestamp': '2025-09-10 02:45:25.301919', 'step': 16392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:25.333754', 'step': 16392, 'epoch': 3} {'type': 'loss', 'content': 0.01563744619488716, 'timestamp': '2025-09-10 02:45:25.336463', 'step': 16393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.366691', 'step': 16393, 'epoch': 3} {'type': 'loss', 'content': 0.07716257870197296, 'timestamp': '2025-09-10 02:45:25.369332', 'step': 16394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.403729', 'step': 16394, 'epoch': 3} {'type': 'loss', 'content': 0.05670928210020065, 'timestamp': '2025-09-10 02:45:25.406080', 'step': 16395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:25.436145', 'step': 16395, 'epoch': 3} {'type': 'loss', 'content': 0.08492351323366165, 'timestamp': '2025-09-10 02:45:25.459722', 'step': 16396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:25.490484', 'step': 16396, 'epoch': 3} {'type': 'loss', 'content': 0.1196252778172493, 'timestamp': '2025-09-10 02:45:25.492691', 'step': 16397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.522927', 'step': 16397, 'epoch': 3} {'type': 'loss', 'content': 0.030507788062095642, 'timestamp': '2025-09-10 02:45:25.525335', 'step': 16398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:25.556361', 'step': 16398, 'epoch': 3} {'type': 'loss', 'content': 0.09220761060714722, 'timestamp': '2025-09-10 02:45:25.558490', 'step': 16399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:25.588899', 'step': 16399, 'epoch': 3} {'type': 'loss', 'content': 0.06289110332727432, 'timestamp': '2025-09-10 02:45:25.612460', 'step': 16400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.642326', 'step': 16400, 'epoch': 3} {'type': 'loss', 'content': 0.07121946662664413, 'timestamp': '2025-09-10 02:45:25.644661', 'step': 16401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:25.675542', 'step': 16401, 'epoch': 3} {'type': 'loss', 'content': 0.07427618652582169, 'timestamp': '2025-09-10 02:45:25.677817', 'step': 16402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.708899', 'step': 16402, 'epoch': 3} {'type': 'loss', 'content': 0.044931136071681976, 'timestamp': '2025-09-10 02:45:25.711443', 'step': 16403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:25.741598', 'step': 16403, 'epoch': 3} {'type': 'loss', 'content': 0.052238937467336655, 'timestamp': '2025-09-10 02:45:25.765692', 'step': 16404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:25.801435', 'step': 16404, 'epoch': 3} {'type': 'loss', 'content': 0.02236735261976719, 'timestamp': '2025-09-10 02:45:25.803994', 'step': 16405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:25.834704', 'step': 16405, 'epoch': 3} {'type': 'loss', 'content': 0.04947327822446823, 'timestamp': '2025-09-10 02:45:25.839043', 'step': 16406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.869770', 'step': 16406, 'epoch': 3} {'type': 'loss', 'content': 0.06684721261262894, 'timestamp': '2025-09-10 02:45:25.872580', 'step': 16407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.903777', 'step': 16407, 'epoch': 3} {'type': 'loss', 'content': 0.07825805991888046, 'timestamp': '2025-09-10 02:45:25.927195', 'step': 16408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:25.957816', 'step': 16408, 'epoch': 3} {'type': 'loss', 'content': 0.08238199353218079, 'timestamp': '2025-09-10 02:45:25.959797', 'step': 16409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:25.989611', 'step': 16409, 'epoch': 3} {'type': 'loss', 'content': 0.07975862920284271, 'timestamp': '2025-09-10 02:45:25.991993', 'step': 16410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:26.022652', 'step': 16410, 'epoch': 3} {'type': 'loss', 'content': 0.067600317299366, 'timestamp': '2025-09-10 02:45:26.025158', 'step': 16411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.055201', 'step': 16411, 'epoch': 3} {'type': 'loss', 'content': 0.04115147516131401, 'timestamp': '2025-09-10 02:45:26.078899', 'step': 16412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.109828', 'step': 16412, 'epoch': 3} {'type': 'loss', 'content': 0.0381997711956501, 'timestamp': '2025-09-10 02:45:26.112291', 'step': 16413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:26.142884', 'step': 16413, 'epoch': 3} {'type': 'loss', 'content': 0.039638470858335495, 'timestamp': '2025-09-10 02:45:26.145427', 'step': 16414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:26.175632', 'step': 16414, 'epoch': 3} {'type': 'loss', 'content': 0.05457378551363945, 'timestamp': '2025-09-10 02:45:26.178019', 'step': 16415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:26.208315', 'step': 16415, 'epoch': 3} {'type': 'loss', 'content': 0.1463472545146942, 'timestamp': '2025-09-10 02:45:26.232277', 'step': 16416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.264714', 'step': 16416, 'epoch': 3} {'type': 'loss', 'content': 0.06348631531000137, 'timestamp': '2025-09-10 02:45:26.266945', 'step': 16417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:26.297595', 'step': 16417, 'epoch': 3} {'type': 'loss', 'content': 0.06898701936006546, 'timestamp': '2025-09-10 02:45:26.299996', 'step': 16418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:26.329814', 'step': 16418, 'epoch': 3} {'type': 'loss', 'content': 0.022396035492420197, 'timestamp': '2025-09-10 02:45:26.332279', 'step': 16419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:26.362860', 'step': 16419, 'epoch': 3} {'type': 'loss', 'content': 0.09544293582439423, 'timestamp': '2025-09-10 02:45:26.386555', 'step': 16420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:26.417288', 'step': 16420, 'epoch': 3} {'type': 'loss', 'content': 0.1155749037861824, 'timestamp': '2025-09-10 02:45:26.419732', 'step': 16421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:26.449421', 'step': 16421, 'epoch': 3} {'type': 'loss', 'content': 0.0928027555346489, 'timestamp': '2025-09-10 02:45:26.452460', 'step': 16422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:26.483813', 'step': 16422, 'epoch': 3} {'type': 'loss', 'content': 0.09349766373634338, 'timestamp': '2025-09-10 02:45:26.486249', 'step': 16423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.515994', 'step': 16423, 'epoch': 3} {'type': 'loss', 'content': 0.09182487428188324, 'timestamp': '2025-09-10 02:45:26.539665', 'step': 16424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.571846', 'step': 16424, 'epoch': 3} {'type': 'loss', 'content': 0.09506900608539581, 'timestamp': '2025-09-10 02:45:26.574389', 'step': 16425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:26.606456', 'step': 16425, 'epoch': 3} {'type': 'loss', 'content': 0.07071958482265472, 'timestamp': '2025-09-10 02:45:26.610779', 'step': 16426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.640860', 'step': 16426, 'epoch': 3} {'type': 'loss', 'content': 0.011352752335369587, 'timestamp': '2025-09-10 02:45:26.642996', 'step': 16427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:26.675129', 'step': 16427, 'epoch': 3} {'type': 'loss', 'content': 0.035059213638305664, 'timestamp': '2025-09-10 02:45:26.698779', 'step': 16428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:26.729705', 'step': 16428, 'epoch': 3} {'type': 'loss', 'content': 0.10337710380554199, 'timestamp': '2025-09-10 02:45:26.732081', 'step': 16429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:26.762962', 'step': 16429, 'epoch': 3} {'type': 'loss', 'content': 0.0740964263677597, 'timestamp': '2025-09-10 02:45:26.765095', 'step': 16430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:45:26.795690', 'step': 16430, 'epoch': 3} {'type': 'loss', 'content': 0.0779910758137703, 'timestamp': '2025-09-10 02:45:26.799681', 'step': 16431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.831085', 'step': 16431, 'epoch': 3} {'type': 'loss', 'content': 0.06874050199985504, 'timestamp': '2025-09-10 02:45:26.854862', 'step': 16432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.886786', 'step': 16432, 'epoch': 3} {'type': 'loss', 'content': 0.053408149629831314, 'timestamp': '2025-09-10 02:45:26.889488', 'step': 16433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.919881', 'step': 16433, 'epoch': 3} {'type': 'loss', 'content': 0.12955564260482788, 'timestamp': '2025-09-10 02:45:26.922449', 'step': 16434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:26.952700', 'step': 16434, 'epoch': 3} {'type': 'loss', 'content': 0.04135581851005554, 'timestamp': '2025-09-10 02:45:26.955261', 'step': 16435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:26.985692', 'step': 16435, 'epoch': 3} {'type': 'loss', 'content': 0.06280671060085297, 'timestamp': '2025-09-10 02:45:27.009646', 'step': 16436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:27.041311', 'step': 16436, 'epoch': 3} {'type': 'loss', 'content': 0.057500217109918594, 'timestamp': '2025-09-10 02:45:27.043290', 'step': 16437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:27.073670', 'step': 16437, 'epoch': 3} {'type': 'loss', 'content': 0.05455949902534485, 'timestamp': '2025-09-10 02:45:27.075749', 'step': 16438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:27.107038', 'step': 16438, 'epoch': 3} {'type': 'loss', 'content': 0.11753717064857483, 'timestamp': '2025-09-10 02:45:27.110233', 'step': 16439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:27.141219', 'step': 16439, 'epoch': 3} {'type': 'loss', 'content': 0.024036943912506104, 'timestamp': '2025-09-10 02:45:27.164903', 'step': 16440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:27.195725', 'step': 16440, 'epoch': 3} {'type': 'loss', 'content': 0.021846231073141098, 'timestamp': '2025-09-10 02:45:27.197992', 'step': 16441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:27.231021', 'step': 16441, 'epoch': 3} {'type': 'loss', 'content': 0.05703497305512428, 'timestamp': '2025-09-10 02:45:27.233282', 'step': 16442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:27.265162', 'step': 16442, 'epoch': 3} {'type': 'loss', 'content': 0.04462335631251335, 'timestamp': '2025-09-10 02:45:27.267536', 'step': 16443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:27.298736', 'step': 16443, 'epoch': 3} {'type': 'loss', 'content': 0.058883436024188995, 'timestamp': '2025-09-10 02:45:27.322730', 'step': 16444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:27.353896', 'step': 16444, 'epoch': 3} {'type': 'loss', 'content': 0.09951790422201157, 'timestamp': '2025-09-10 02:45:27.358050', 'step': 16445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:27.389149', 'step': 16445, 'epoch': 3} {'type': 'loss', 'content': 0.08940421789884567, 'timestamp': '2025-09-10 02:45:27.391039', 'step': 16446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:27.420797', 'step': 16446, 'epoch': 3} {'type': 'loss', 'content': 0.06635668128728867, 'timestamp': '2025-09-10 02:45:27.423192', 'step': 16447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:27.454043', 'step': 16447, 'epoch': 3} {'type': 'loss', 'content': 0.083440862596035, 'timestamp': '2025-09-10 02:45:27.477805', 'step': 16448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:27.508238', 'step': 16448, 'epoch': 3} {'type': 'loss', 'content': 0.06482890993356705, 'timestamp': '2025-09-10 02:45:27.510521', 'step': 16449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:27.540420', 'step': 16449, 'epoch': 3} {'type': 'loss', 'content': 0.040105901658535004, 'timestamp': '2025-09-10 02:45:27.543422', 'step': 16450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:27.573368', 'step': 16450, 'epoch': 3} {'type': 'loss', 'content': 0.05581315606832504, 'timestamp': '2025-09-10 02:45:27.575704', 'step': 16451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:27.606195', 'step': 16451, 'epoch': 3} {'type': 'loss', 'content': 0.0716022476553917, 'timestamp': '2025-09-10 02:45:27.629766', 'step': 16452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:27.660031', 'step': 16452, 'epoch': 3} {'type': 'loss', 'content': 0.060882773250341415, 'timestamp': '2025-09-10 02:45:27.663400', 'step': 16453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:27.694118', 'step': 16453, 'epoch': 3} {'type': 'loss', 'content': 0.038102202117443085, 'timestamp': '2025-09-10 02:45:27.696206', 'step': 16454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:27.725744', 'step': 16454, 'epoch': 3} {'type': 'loss', 'content': 0.07757041603326797, 'timestamp': '2025-09-10 02:45:27.727730', 'step': 16455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:27.759259', 'step': 16455, 'epoch': 3} {'type': 'loss', 'content': 0.0374784879386425, 'timestamp': '2025-09-10 02:45:27.784530', 'step': 16456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:27.815618', 'step': 16456, 'epoch': 3} {'type': 'loss', 'content': 0.026419833302497864, 'timestamp': '2025-09-10 02:45:27.818123', 'step': 16457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:27.848914', 'step': 16457, 'epoch': 3} {'type': 'loss', 'content': 0.0984291136264801, 'timestamp': '2025-09-10 02:45:27.850918', 'step': 16458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:27.880612', 'step': 16458, 'epoch': 3} {'type': 'loss', 'content': 0.04312717169523239, 'timestamp': '2025-09-10 02:45:27.883309', 'step': 16459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:27.913916', 'step': 16459, 'epoch': 3} {'type': 'loss', 'content': 0.02881544455885887, 'timestamp': '2025-09-10 02:45:27.937332', 'step': 16460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:27.968488', 'step': 16460, 'epoch': 3} {'type': 'loss', 'content': 0.13374631106853485, 'timestamp': '2025-09-10 02:45:27.970733', 'step': 16461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:28.001785', 'step': 16461, 'epoch': 3} {'type': 'loss', 'content': 0.06062011420726776, 'timestamp': '2025-09-10 02:45:28.004040', 'step': 16462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.034609', 'step': 16462, 'epoch': 3} {'type': 'loss', 'content': 0.03560938686132431, 'timestamp': '2025-09-10 02:45:28.036824', 'step': 16463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.067039', 'step': 16463, 'epoch': 3} {'type': 'loss', 'content': 0.08237864077091217, 'timestamp': '2025-09-10 02:45:28.090824', 'step': 16464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.122151', 'step': 16464, 'epoch': 3} {'type': 'loss', 'content': 0.08844549208879471, 'timestamp': '2025-09-10 02:45:28.124814', 'step': 16465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.155360', 'step': 16465, 'epoch': 3} {'type': 'loss', 'content': 0.02439633011817932, 'timestamp': '2025-09-10 02:45:28.157891', 'step': 16466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:28.188249', 'step': 16466, 'epoch': 3} {'type': 'loss', 'content': 0.04385026544332504, 'timestamp': '2025-09-10 02:45:28.191261', 'step': 16467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:28.221313', 'step': 16467, 'epoch': 3} {'type': 'loss', 'content': 0.0696028620004654, 'timestamp': '2025-09-10 02:45:28.244966', 'step': 16468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:28.276192', 'step': 16468, 'epoch': 3} {'type': 'loss', 'content': 0.0638137087225914, 'timestamp': '2025-09-10 02:45:28.278791', 'step': 16469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:28.308383', 'step': 16469, 'epoch': 3} {'type': 'loss', 'content': 0.05573953315615654, 'timestamp': '2025-09-10 02:45:28.310585', 'step': 16470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:28.341477', 'step': 16470, 'epoch': 3} {'type': 'loss', 'content': 0.052716586738824844, 'timestamp': '2025-09-10 02:45:28.343736', 'step': 16471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.374407', 'step': 16471, 'epoch': 3} {'type': 'loss', 'content': 0.034432362765073776, 'timestamp': '2025-09-10 02:45:28.398054', 'step': 16472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.429034', 'step': 16472, 'epoch': 3} {'type': 'loss', 'content': 0.08244027942419052, 'timestamp': '2025-09-10 02:45:28.431667', 'step': 16473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:28.462166', 'step': 16473, 'epoch': 3} {'type': 'loss', 'content': 0.07085876166820526, 'timestamp': '2025-09-10 02:45:28.464439', 'step': 16474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.495737', 'step': 16474, 'epoch': 3} {'type': 'loss', 'content': 0.11362621188163757, 'timestamp': '2025-09-10 02:45:28.498274', 'step': 16475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:28.527867', 'step': 16475, 'epoch': 3} {'type': 'loss', 'content': 0.07452113926410675, 'timestamp': '2025-09-10 02:45:28.551599', 'step': 16476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.582198', 'step': 16476, 'epoch': 3} {'type': 'loss', 'content': 0.052755922079086304, 'timestamp': '2025-09-10 02:45:28.584627', 'step': 16477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.615375', 'step': 16477, 'epoch': 3} {'type': 'loss', 'content': 0.03081599809229374, 'timestamp': '2025-09-10 02:45:28.619481', 'step': 16478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:28.649725', 'step': 16478, 'epoch': 3} {'type': 'loss', 'content': 0.03372617065906525, 'timestamp': '2025-09-10 02:45:28.652497', 'step': 16479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.683651', 'step': 16479, 'epoch': 3} {'type': 'loss', 'content': 0.06787820160388947, 'timestamp': '2025-09-10 02:45:28.707590', 'step': 16480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.738171', 'step': 16480, 'epoch': 3} {'type': 'loss', 'content': 0.04692886769771576, 'timestamp': '2025-09-10 02:45:28.741645', 'step': 16481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:28.775032', 'step': 16481, 'epoch': 3} {'type': 'loss', 'content': 0.0069813234731554985, 'timestamp': '2025-09-10 02:45:28.778284', 'step': 16482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:28.815721', 'step': 16482, 'epoch': 3} {'type': 'loss', 'content': 0.07504550367593765, 'timestamp': '2025-09-10 02:45:28.818523', 'step': 16483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:28.850574', 'step': 16483, 'epoch': 3} {'type': 'loss', 'content': 0.035215068608522415, 'timestamp': '2025-09-10 02:45:28.875215', 'step': 16484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:28.911949', 'step': 16484, 'epoch': 3} {'type': 'loss', 'content': 0.0564410537481308, 'timestamp': '2025-09-10 02:45:28.915215', 'step': 16485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:28.947205', 'step': 16485, 'epoch': 3} {'type': 'loss', 'content': 0.06759624183177948, 'timestamp': '2025-09-10 02:45:28.949764', 'step': 16486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:28.981413', 'step': 16486, 'epoch': 3} {'type': 'loss', 'content': 0.03681303933262825, 'timestamp': '2025-09-10 02:45:28.983887', 'step': 16487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:29.014056', 'step': 16487, 'epoch': 3} {'type': 'loss', 'content': 0.048152752220630646, 'timestamp': '2025-09-10 02:45:29.037740', 'step': 16488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:29.068454', 'step': 16488, 'epoch': 3} {'type': 'loss', 'content': 0.08015197515487671, 'timestamp': '2025-09-10 02:45:29.070483', 'step': 16489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:29.100547', 'step': 16489, 'epoch': 3} {'type': 'loss', 'content': 0.10238650441169739, 'timestamp': '2025-09-10 02:45:29.102973', 'step': 16490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:29.133534', 'step': 16490, 'epoch': 3} {'type': 'loss', 'content': 0.04411182552576065, 'timestamp': '2025-09-10 02:45:29.136061', 'step': 16491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:29.166167', 'step': 16491, 'epoch': 3} {'type': 'loss', 'content': 0.06555788218975067, 'timestamp': '2025-09-10 02:45:29.190490', 'step': 16492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:29.221875', 'step': 16492, 'epoch': 3} {'type': 'loss', 'content': 0.023923177272081375, 'timestamp': '2025-09-10 02:45:29.224278', 'step': 16493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:45:29.254847', 'step': 16493, 'epoch': 3} {'type': 'loss', 'content': 0.06687016785144806, 'timestamp': '2025-09-10 02:45:29.258933', 'step': 16494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:29.289664', 'step': 16494, 'epoch': 3} {'type': 'loss', 'content': 0.10738768428564072, 'timestamp': '2025-09-10 02:45:29.291700', 'step': 16495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:29.323032', 'step': 16495, 'epoch': 3} {'type': 'loss', 'content': 0.09903176873922348, 'timestamp': '2025-09-10 02:45:29.346844', 'step': 16496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:29.377143', 'step': 16496, 'epoch': 3} {'type': 'loss', 'content': 0.06582595407962799, 'timestamp': '2025-09-10 02:45:29.379379', 'step': 16497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:29.411533', 'step': 16497, 'epoch': 3} {'type': 'loss', 'content': 0.09826728701591492, 'timestamp': '2025-09-10 02:45:29.413967', 'step': 16498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:29.444562', 'step': 16498, 'epoch': 3} {'type': 'loss', 'content': 0.023181000724434853, 'timestamp': '2025-09-10 02:45:29.450701', 'step': 16499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:29.488526', 'step': 16499, 'epoch': 3} {'type': 'loss', 'content': 0.0778219997882843, 'timestamp': '2025-09-10 02:45:29.512254', 'step': 16500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16500', 'timestamp': '2025-09-10 02:45:34.876301', 'step': 16500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:34.925726', 'step': 16500, 'epoch': 3} {'type': 'loss', 'content': 0.046912215650081635, 'timestamp': '2025-09-10 02:45:34.930688', 'step': 16501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:34.966836', 'step': 16501, 'epoch': 3} {'type': 'loss', 'content': 0.1121315136551857, 'timestamp': '2025-09-10 02:45:34.974901', 'step': 16502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:35.020388', 'step': 16502, 'epoch': 3} {'type': 'loss', 'content': 0.08650581538677216, 'timestamp': '2025-09-10 02:45:35.022620', 'step': 16503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:35.055534', 'step': 16503, 'epoch': 3} {'type': 'loss', 'content': 0.0330238938331604, 'timestamp': '2025-09-10 02:45:35.079678', 'step': 16504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:35.110127', 'step': 16504, 'epoch': 3} {'type': 'loss', 'content': 0.059050045907497406, 'timestamp': '2025-09-10 02:45:35.112331', 'step': 16505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:35.142974', 'step': 16505, 'epoch': 3} {'type': 'loss', 'content': 0.06178189069032669, 'timestamp': '2025-09-10 02:45:35.145574', 'step': 16506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:35.177263', 'step': 16506, 'epoch': 3} {'type': 'loss', 'content': 0.1216878890991211, 'timestamp': '2025-09-10 02:45:35.179448', 'step': 16507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:35.209353', 'step': 16507, 'epoch': 3} {'type': 'loss', 'content': 0.027072319760918617, 'timestamp': '2025-09-10 02:45:35.232542', 'step': 16508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:35.268675', 'step': 16508, 'epoch': 3} {'type': 'loss', 'content': 0.02622699923813343, 'timestamp': '2025-09-10 02:45:35.273699', 'step': 16509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:35.309209', 'step': 16509, 'epoch': 3} {'type': 'loss', 'content': 0.1005750447511673, 'timestamp': '2025-09-10 02:45:35.311768', 'step': 16510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:35.348667', 'step': 16510, 'epoch': 3} {'type': 'loss', 'content': 0.08469251543283463, 'timestamp': '2025-09-10 02:45:35.350886', 'step': 16511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:35.382532', 'step': 16511, 'epoch': 3} {'type': 'loss', 'content': 0.08619577437639236, 'timestamp': '2025-09-10 02:45:35.405956', 'step': 16512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:35.436392', 'step': 16512, 'epoch': 3} {'type': 'loss', 'content': 0.0864161029458046, 'timestamp': '2025-09-10 02:45:35.438302', 'step': 16513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:35.471674', 'step': 16513, 'epoch': 3} {'type': 'loss', 'content': 0.0555950403213501, 'timestamp': '2025-09-10 02:45:35.477213', 'step': 16514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:35.512477', 'step': 16514, 'epoch': 3} {'type': 'loss', 'content': 0.047227777540683746, 'timestamp': '2025-09-10 02:45:35.515523', 'step': 16515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:35.562985', 'step': 16515, 'epoch': 3} {'type': 'loss', 'content': 0.06270121783018112, 'timestamp': '2025-09-10 02:45:35.591044', 'step': 16516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:35.638885', 'step': 16516, 'epoch': 3} {'type': 'loss', 'content': 0.0936010479927063, 'timestamp': '2025-09-10 02:45:35.643934', 'step': 16517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:35.705652', 'step': 16517, 'epoch': 3} {'type': 'loss', 'content': 0.031128695234656334, 'timestamp': '2025-09-10 02:45:35.709337', 'step': 16518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:35.764021', 'step': 16518, 'epoch': 3} {'type': 'loss', 'content': 0.05216261371970177, 'timestamp': '2025-09-10 02:45:35.776016', 'step': 16519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:35.839424', 'step': 16519, 'epoch': 3} {'type': 'loss', 'content': 0.1293768435716629, 'timestamp': '2025-09-10 02:45:35.874264', 'step': 16520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:35.920368', 'step': 16520, 'epoch': 3} {'type': 'loss', 'content': 0.07198399305343628, 'timestamp': '2025-09-10 02:45:35.930975', 'step': 16521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:35.982571', 'step': 16521, 'epoch': 3} {'type': 'loss', 'content': 0.10443881154060364, 'timestamp': '2025-09-10 02:45:35.985962', 'step': 16522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:45:36.034774', 'step': 16522, 'epoch': 3} {'type': 'loss', 'content': 0.062484871596097946, 'timestamp': '2025-09-10 02:45:36.037980', 'step': 16523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:36.069760', 'step': 16523, 'epoch': 3} {'type': 'loss', 'content': 0.085066057741642, 'timestamp': '2025-09-10 02:45:36.093260', 'step': 16524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:36.127660', 'step': 16524, 'epoch': 3} {'type': 'loss', 'content': 0.02268039993941784, 'timestamp': '2025-09-10 02:45:36.132050', 'step': 16525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:36.165650', 'step': 16525, 'epoch': 3} {'type': 'loss', 'content': 0.020096903666853905, 'timestamp': '2025-09-10 02:45:36.167871', 'step': 16526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:36.201131', 'step': 16526, 'epoch': 3} {'type': 'loss', 'content': 0.018627937883138657, 'timestamp': '2025-09-10 02:45:36.204995', 'step': 16527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:36.243785', 'step': 16527, 'epoch': 3} {'type': 'loss', 'content': 0.06730113178491592, 'timestamp': '2025-09-10 02:45:36.269253', 'step': 16528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:36.306808', 'step': 16528, 'epoch': 3} {'type': 'loss', 'content': 0.04984349384903908, 'timestamp': '2025-09-10 02:45:36.309943', 'step': 16529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:36.343749', 'step': 16529, 'epoch': 3} {'type': 'loss', 'content': 0.05955473333597183, 'timestamp': '2025-09-10 02:45:36.346156', 'step': 16530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:36.382011', 'step': 16530, 'epoch': 3} {'type': 'loss', 'content': 0.13655248284339905, 'timestamp': '2025-09-10 02:45:36.384902', 'step': 16531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:36.419372', 'step': 16531, 'epoch': 3} {'type': 'loss', 'content': 0.054791953414678574, 'timestamp': '2025-09-10 02:45:36.446613', 'step': 16532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:36.479271', 'step': 16532, 'epoch': 3} {'type': 'loss', 'content': 0.08011115342378616, 'timestamp': '2025-09-10 02:45:36.481495', 'step': 16533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:36.520211', 'step': 16533, 'epoch': 3} {'type': 'loss', 'content': 0.040491167455911636, 'timestamp': '2025-09-10 02:45:36.522813', 'step': 16534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:36.555552', 'step': 16534, 'epoch': 3} {'type': 'loss', 'content': 0.21060584485530853, 'timestamp': '2025-09-10 02:45:36.561036', 'step': 16535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:36.595954', 'step': 16535, 'epoch': 3} {'type': 'loss', 'content': 0.07657862454652786, 'timestamp': '2025-09-10 02:45:36.620121', 'step': 16536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:36.655785', 'step': 16536, 'epoch': 3} {'type': 'loss', 'content': 0.07511113584041595, 'timestamp': '2025-09-10 02:45:36.658418', 'step': 16537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:36.692093', 'step': 16537, 'epoch': 3} {'type': 'loss', 'content': 0.05225488916039467, 'timestamp': '2025-09-10 02:45:36.694356', 'step': 16538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:36.728146', 'step': 16538, 'epoch': 3} {'type': 'loss', 'content': 0.04969972372055054, 'timestamp': '2025-09-10 02:45:36.730215', 'step': 16539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:36.761655', 'step': 16539, 'epoch': 3} {'type': 'loss', 'content': 0.054560136049985886, 'timestamp': '2025-09-10 02:45:36.787802', 'step': 16540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:36.825766', 'step': 16540, 'epoch': 3} {'type': 'loss', 'content': 0.12138909846544266, 'timestamp': '2025-09-10 02:45:36.833107', 'step': 16541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:36.870655', 'step': 16541, 'epoch': 3} {'type': 'loss', 'content': 0.053922463208436966, 'timestamp': '2025-09-10 02:45:36.886149', 'step': 16542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:36.964946', 'step': 16542, 'epoch': 3} {'type': 'loss', 'content': 0.1124648004770279, 'timestamp': '2025-09-10 02:45:36.979334', 'step': 16543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:37.075606', 'step': 16543, 'epoch': 3} {'type': 'loss', 'content': 0.09485289454460144, 'timestamp': '2025-09-10 02:45:37.121494', 'step': 16544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:37.205513', 'step': 16544, 'epoch': 3} {'type': 'loss', 'content': 0.05498224124312401, 'timestamp': '2025-09-10 02:45:37.214344', 'step': 16545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:37.272023', 'step': 16545, 'epoch': 3} {'type': 'loss', 'content': 0.06457497179508209, 'timestamp': '2025-09-10 02:45:37.282847', 'step': 16546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:37.335628', 'step': 16546, 'epoch': 3} {'type': 'loss', 'content': 0.08292416483163834, 'timestamp': '2025-09-10 02:45:37.351167', 'step': 16547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:37.420111', 'step': 16547, 'epoch': 3} {'type': 'loss', 'content': 0.050546273589134216, 'timestamp': '2025-09-10 02:45:37.466618', 'step': 16548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:37.529781', 'step': 16548, 'epoch': 3} {'type': 'loss', 'content': 0.043261490762233734, 'timestamp': '2025-09-10 02:45:37.544393', 'step': 16549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:37.601429', 'step': 16549, 'epoch': 3} {'type': 'loss', 'content': 0.11954241991043091, 'timestamp': '2025-09-10 02:45:37.614979', 'step': 16550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:37.675801', 'step': 16550, 'epoch': 3} {'type': 'loss', 'content': 0.0448232963681221, 'timestamp': '2025-09-10 02:45:37.689293', 'step': 16551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:37.761546', 'step': 16551, 'epoch': 3} {'type': 'loss', 'content': 0.06285277754068375, 'timestamp': '2025-09-10 02:45:37.796874', 'step': 16552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:37.877367', 'step': 16552, 'epoch': 3} {'type': 'loss', 'content': 0.06792163103818893, 'timestamp': '2025-09-10 02:45:37.887451', 'step': 16553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:37.972694', 'step': 16553, 'epoch': 3} {'type': 'loss', 'content': 0.12034894526004791, 'timestamp': '2025-09-10 02:45:37.987321', 'step': 16554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:38.073235', 'step': 16554, 'epoch': 3} {'type': 'loss', 'content': 0.04763831943273544, 'timestamp': '2025-09-10 02:45:38.088094', 'step': 16555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:38.162380', 'step': 16555, 'epoch': 3} {'type': 'loss', 'content': 0.04837390407919884, 'timestamp': '2025-09-10 02:45:38.202123', 'step': 16556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:38.289011', 'step': 16556, 'epoch': 3} {'type': 'loss', 'content': 0.11800047010183334, 'timestamp': '2025-09-10 02:45:38.301700', 'step': 16557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:38.378529', 'step': 16557, 'epoch': 3} {'type': 'loss', 'content': 0.07125194370746613, 'timestamp': '2025-09-10 02:45:38.398928', 'step': 16558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:38.509458', 'step': 16558, 'epoch': 3} {'type': 'loss', 'content': 0.11311432719230652, 'timestamp': '2025-09-10 02:45:38.525933', 'step': 16559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:38.597769', 'step': 16559, 'epoch': 3} {'type': 'loss', 'content': 0.03826683387160301, 'timestamp': '2025-09-10 02:45:38.641060', 'step': 16560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:38.689014', 'step': 16560, 'epoch': 3} {'type': 'loss', 'content': 0.1289638876914978, 'timestamp': '2025-09-10 02:45:38.700988', 'step': 16561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:38.790382', 'step': 16561, 'epoch': 3} {'type': 'loss', 'content': 0.06125668063759804, 'timestamp': '2025-09-10 02:45:38.830067', 'step': 16562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:38.883342', 'step': 16562, 'epoch': 3} {'type': 'loss', 'content': 0.025805804878473282, 'timestamp': '2025-09-10 02:45:38.895659', 'step': 16563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:38.969773', 'step': 16563, 'epoch': 3} {'type': 'loss', 'content': 0.11245324462652206, 'timestamp': '2025-09-10 02:45:39.006189', 'step': 16564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:39.069319', 'step': 16564, 'epoch': 3} {'type': 'loss', 'content': 0.022852489724755287, 'timestamp': '2025-09-10 02:45:39.080205', 'step': 16565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:39.137759', 'step': 16565, 'epoch': 3} {'type': 'loss', 'content': 0.02647457830607891, 'timestamp': '2025-09-10 02:45:39.151001', 'step': 16566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:39.194907', 'step': 16566, 'epoch': 3} {'type': 'loss', 'content': 0.051397569477558136, 'timestamp': '2025-09-10 02:45:39.208798', 'step': 16567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:39.267845', 'step': 16567, 'epoch': 3} {'type': 'loss', 'content': 0.06980273127555847, 'timestamp': '2025-09-10 02:45:39.300301', 'step': 16568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:39.358241', 'step': 16568, 'epoch': 3} {'type': 'loss', 'content': 0.07480400055646896, 'timestamp': '2025-09-10 02:45:39.368269', 'step': 16569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:39.444400', 'step': 16569, 'epoch': 3} {'type': 'loss', 'content': 0.12257015705108643, 'timestamp': '2025-09-10 02:45:39.460911', 'step': 16570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:39.529888', 'step': 16570, 'epoch': 3} {'type': 'loss', 'content': 0.06695367395877838, 'timestamp': '2025-09-10 02:45:39.546582', 'step': 16571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:39.594227', 'step': 16571, 'epoch': 3} {'type': 'loss', 'content': 0.03724139556288719, 'timestamp': '2025-09-10 02:45:39.626245', 'step': 16572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:39.677577', 'step': 16572, 'epoch': 3} {'type': 'loss', 'content': 0.11484823375940323, 'timestamp': '2025-09-10 02:45:39.693842', 'step': 16573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:39.784430', 'step': 16573, 'epoch': 3} {'type': 'loss', 'content': 0.03022974357008934, 'timestamp': '2025-09-10 02:45:39.797416', 'step': 16574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:39.878734', 'step': 16574, 'epoch': 3} {'type': 'loss', 'content': 0.1159164309501648, 'timestamp': '2025-09-10 02:45:39.891347', 'step': 16575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:39.949353', 'step': 16575, 'epoch': 3} {'type': 'loss', 'content': 0.05278732255101204, 'timestamp': '2025-09-10 02:45:39.983031', 'step': 16576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:40.060762', 'step': 16576, 'epoch': 3} {'type': 'loss', 'content': 0.03163648024201393, 'timestamp': '2025-09-10 02:45:40.087297', 'step': 16577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:40.158080', 'step': 16577, 'epoch': 3} {'type': 'loss', 'content': 0.05141692981123924, 'timestamp': '2025-09-10 02:45:40.170866', 'step': 16578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:40.225293', 'step': 16578, 'epoch': 3} {'type': 'loss', 'content': 0.09593061357736588, 'timestamp': '2025-09-10 02:45:40.232656', 'step': 16579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:40.306461', 'step': 16579, 'epoch': 3} {'type': 'loss', 'content': 0.10495780408382416, 'timestamp': '2025-09-10 02:45:40.335021', 'step': 16580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:40.395566', 'step': 16580, 'epoch': 3} {'type': 'loss', 'content': 0.026804160326719284, 'timestamp': '2025-09-10 02:45:40.421454', 'step': 16581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:40.472595', 'step': 16581, 'epoch': 3} {'type': 'loss', 'content': 0.06260289251804352, 'timestamp': '2025-09-10 02:45:40.479531', 'step': 16582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:40.537489', 'step': 16582, 'epoch': 3} {'type': 'loss', 'content': 0.05556781217455864, 'timestamp': '2025-09-10 02:45:40.542434', 'step': 16583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:40.609721', 'step': 16583, 'epoch': 3} {'type': 'loss', 'content': 0.05366713926196098, 'timestamp': '2025-09-10 02:45:40.643601', 'step': 16584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:40.727749', 'step': 16584, 'epoch': 3} {'type': 'loss', 'content': 0.06256234645843506, 'timestamp': '2025-09-10 02:45:40.742267', 'step': 16585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:40.785325', 'step': 16585, 'epoch': 3} {'type': 'loss', 'content': 0.04595236852765083, 'timestamp': '2025-09-10 02:45:40.793477', 'step': 16586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:40.851961', 'step': 16586, 'epoch': 3} {'type': 'loss', 'content': 0.09734570980072021, 'timestamp': '2025-09-10 02:45:40.858978', 'step': 16587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:40.906407', 'step': 16587, 'epoch': 3} {'type': 'loss', 'content': 0.10737954825162888, 'timestamp': '2025-09-10 02:45:40.951608', 'step': 16588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:41.009214', 'step': 16588, 'epoch': 3} {'type': 'loss', 'content': 0.010531272739171982, 'timestamp': '2025-09-10 02:45:41.035619', 'step': 16589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:41.088346', 'step': 16589, 'epoch': 3} {'type': 'loss', 'content': 0.07297681272029877, 'timestamp': '2025-09-10 02:45:41.093637', 'step': 16590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:41.171012', 'step': 16590, 'epoch': 3} {'type': 'loss', 'content': 0.08542583137750626, 'timestamp': '2025-09-10 02:45:41.202757', 'step': 16591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:41.282147', 'step': 16591, 'epoch': 3} {'type': 'loss', 'content': 0.049082186073064804, 'timestamp': '2025-09-10 02:45:41.312696', 'step': 16592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:41.390226', 'step': 16592, 'epoch': 3} {'type': 'loss', 'content': 0.0739528238773346, 'timestamp': '2025-09-10 02:45:41.411672', 'step': 16593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:41.467991', 'step': 16593, 'epoch': 3} {'type': 'loss', 'content': 0.07000987231731415, 'timestamp': '2025-09-10 02:45:41.476117', 'step': 16594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:41.533448', 'step': 16594, 'epoch': 3} {'type': 'loss', 'content': 0.0910581573843956, 'timestamp': '2025-09-10 02:45:41.539637', 'step': 16595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:41.614683', 'step': 16595, 'epoch': 3} {'type': 'loss', 'content': 0.07738291472196579, 'timestamp': '2025-09-10 02:45:41.648860', 'step': 16596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:41.708912', 'step': 16596, 'epoch': 3} {'type': 'loss', 'content': 0.03398202359676361, 'timestamp': '2025-09-10 02:45:41.718418', 'step': 16597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:41.779929', 'step': 16597, 'epoch': 3} {'type': 'loss', 'content': 0.09085562080144882, 'timestamp': '2025-09-10 02:45:41.795613', 'step': 16598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:45:41.869781', 'step': 16598, 'epoch': 3} {'type': 'loss', 'content': 0.05451316386461258, 'timestamp': '2025-09-10 02:45:41.894649', 'step': 16599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:41.952972', 'step': 16599, 'epoch': 3} {'type': 'loss', 'content': 0.06737273931503296, 'timestamp': '2025-09-10 02:45:42.014221', 'step': 16600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:42.077462', 'step': 16600, 'epoch': 3} {'type': 'loss', 'content': 0.09213300794363022, 'timestamp': '2025-09-10 02:45:42.088319', 'step': 16601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:42.141519', 'step': 16601, 'epoch': 3} {'type': 'loss', 'content': 0.05406787246465683, 'timestamp': '2025-09-10 02:45:42.153542', 'step': 16602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:42.212988', 'step': 16602, 'epoch': 3} {'type': 'loss', 'content': 0.017993206158280373, 'timestamp': '2025-09-10 02:45:42.221689', 'step': 16603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:42.274567', 'step': 16603, 'epoch': 3} {'type': 'loss', 'content': 0.09918394684791565, 'timestamp': '2025-09-10 02:45:42.321438', 'step': 16604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:42.382679', 'step': 16604, 'epoch': 3} {'type': 'loss', 'content': 0.0987083837389946, 'timestamp': '2025-09-10 02:45:42.393178', 'step': 16605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:42.464820', 'step': 16605, 'epoch': 3} {'type': 'loss', 'content': 0.04614674299955368, 'timestamp': '2025-09-10 02:45:42.479948', 'step': 16606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:42.541315', 'step': 16606, 'epoch': 3} {'type': 'loss', 'content': 0.15340812504291534, 'timestamp': '2025-09-10 02:45:42.567357', 'step': 16607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:42.621869', 'step': 16607, 'epoch': 3} {'type': 'loss', 'content': 0.00833862368017435, 'timestamp': '2025-09-10 02:45:42.673046', 'step': 16608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:42.716970', 'step': 16608, 'epoch': 3} {'type': 'loss', 'content': 0.04326682537794113, 'timestamp': '2025-09-10 02:45:42.733255', 'step': 16609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:42.811722', 'step': 16609, 'epoch': 3} {'type': 'loss', 'content': 0.11653636395931244, 'timestamp': '2025-09-10 02:45:42.819643', 'step': 16610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:42.884418', 'step': 16610, 'epoch': 3} {'type': 'loss', 'content': 0.08164002001285553, 'timestamp': '2025-09-10 02:45:42.896285', 'step': 16611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:42.950930', 'step': 16611, 'epoch': 3} {'type': 'loss', 'content': 0.04173203930258751, 'timestamp': '2025-09-10 02:45:42.991117', 'step': 16612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:43.054145', 'step': 16612, 'epoch': 3} {'type': 'loss', 'content': 0.12273299694061279, 'timestamp': '2025-09-10 02:45:43.080867', 'step': 16613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:43.141801', 'step': 16613, 'epoch': 3} {'type': 'loss', 'content': 0.0628158301115036, 'timestamp': '2025-09-10 02:45:43.153163', 'step': 16614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:43.201897', 'step': 16614, 'epoch': 3} {'type': 'loss', 'content': 0.060541920363903046, 'timestamp': '2025-09-10 02:45:43.217380', 'step': 16615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:43.275009', 'step': 16615, 'epoch': 3} {'type': 'loss', 'content': 0.03463247790932655, 'timestamp': '2025-09-10 02:45:43.309489', 'step': 16616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:43.356767', 'step': 16616, 'epoch': 3} {'type': 'loss', 'content': 0.05596140772104263, 'timestamp': '2025-09-10 02:45:43.365253', 'step': 16617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:43.418741', 'step': 16617, 'epoch': 3} {'type': 'loss', 'content': 0.04507230594754219, 'timestamp': '2025-09-10 02:45:43.435623', 'step': 16618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:43.488963', 'step': 16618, 'epoch': 3} {'type': 'loss', 'content': 0.03496601805090904, 'timestamp': '2025-09-10 02:45:43.513452', 'step': 16619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:43.574476', 'step': 16619, 'epoch': 3} {'type': 'loss', 'content': 0.12206359207630157, 'timestamp': '2025-09-10 02:45:43.606006', 'step': 16620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:43.672759', 'step': 16620, 'epoch': 3} {'type': 'loss', 'content': 0.025148525834083557, 'timestamp': '2025-09-10 02:45:43.681852', 'step': 16621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:43.773760', 'step': 16621, 'epoch': 3} {'type': 'loss', 'content': 0.02574198879301548, 'timestamp': '2025-09-10 02:45:43.799944', 'step': 16622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:43.855966', 'step': 16622, 'epoch': 3} {'type': 'loss', 'content': 0.033153895288705826, 'timestamp': '2025-09-10 02:45:43.884616', 'step': 16623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:43.936661', 'step': 16623, 'epoch': 3} {'type': 'loss', 'content': 0.02183462306857109, 'timestamp': '2025-09-10 02:45:43.969346', 'step': 16624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:44.050871', 'step': 16624, 'epoch': 3} {'type': 'loss', 'content': 0.13975994288921356, 'timestamp': '2025-09-10 02:45:44.055162', 'step': 16625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:44.105192', 'step': 16625, 'epoch': 3} {'type': 'loss', 'content': 0.10833368450403214, 'timestamp': '2025-09-10 02:45:44.124562', 'step': 16626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:44.179225', 'step': 16626, 'epoch': 3} {'type': 'loss', 'content': 0.025732366368174553, 'timestamp': '2025-09-10 02:45:44.188705', 'step': 16627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:44.242982', 'step': 16627, 'epoch': 3} {'type': 'loss', 'content': 0.06605471670627594, 'timestamp': '2025-09-10 02:45:44.280526', 'step': 16628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:44.350525', 'step': 16628, 'epoch': 3} {'type': 'loss', 'content': 0.06708454340696335, 'timestamp': '2025-09-10 02:45:44.370170', 'step': 16629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:44.427148', 'step': 16629, 'epoch': 3} {'type': 'loss', 'content': 0.0647716298699379, 'timestamp': '2025-09-10 02:45:44.443995', 'step': 16630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:45:44.501985', 'step': 16630, 'epoch': 3} {'type': 'loss', 'content': 0.08248093724250793, 'timestamp': '2025-09-10 02:45:44.517521', 'step': 16631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:44.577275', 'step': 16631, 'epoch': 3} {'type': 'loss', 'content': 0.05862267315387726, 'timestamp': '2025-09-10 02:45:44.608662', 'step': 16632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:44.655432', 'step': 16632, 'epoch': 3} {'type': 'loss', 'content': 0.02050068788230419, 'timestamp': '2025-09-10 02:45:44.673658', 'step': 16633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:44.723586', 'step': 16633, 'epoch': 3} {'type': 'loss', 'content': 0.11715279519557953, 'timestamp': '2025-09-10 02:45:44.729783', 'step': 16634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:44.781197', 'step': 16634, 'epoch': 3} {'type': 'loss', 'content': 0.13293181359767914, 'timestamp': '2025-09-10 02:45:44.799541', 'step': 16635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:44.863948', 'step': 16635, 'epoch': 3} {'type': 'loss', 'content': 0.02152642048895359, 'timestamp': '2025-09-10 02:45:44.903978', 'step': 16636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:44.972398', 'step': 16636, 'epoch': 3} {'type': 'loss', 'content': 0.018674898892641068, 'timestamp': '2025-09-10 02:45:44.987791', 'step': 16637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:45.052071', 'step': 16637, 'epoch': 3} {'type': 'loss', 'content': 0.08490784466266632, 'timestamp': '2025-09-10 02:45:45.059437', 'step': 16638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:45.130863', 'step': 16638, 'epoch': 3} {'type': 'loss', 'content': 0.15911562740802765, 'timestamp': '2025-09-10 02:45:45.144611', 'step': 16639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:45.205177', 'step': 16639, 'epoch': 3} {'type': 'loss', 'content': 0.039656125009059906, 'timestamp': '2025-09-10 02:45:45.230952', 'step': 16640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:45.299789', 'step': 16640, 'epoch': 3} {'type': 'loss', 'content': 0.03341465815901756, 'timestamp': '2025-09-10 02:45:45.310824', 'step': 16641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:45.351941', 'step': 16641, 'epoch': 3} {'type': 'loss', 'content': 0.040236108005046844, 'timestamp': '2025-09-10 02:45:45.361239', 'step': 16642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:45.444435', 'step': 16642, 'epoch': 3} {'type': 'loss', 'content': 0.11349885165691376, 'timestamp': '2025-09-10 02:45:45.453169', 'step': 16643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:45.576991', 'step': 16643, 'epoch': 3} {'type': 'loss', 'content': 0.059984613209962845, 'timestamp': '2025-09-10 02:45:45.611893', 'step': 16644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:45.667398', 'step': 16644, 'epoch': 3} {'type': 'loss', 'content': 0.02444751001894474, 'timestamp': '2025-09-10 02:45:45.677479', 'step': 16645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:45.736559', 'step': 16645, 'epoch': 3} {'type': 'loss', 'content': 0.06882653385400772, 'timestamp': '2025-09-10 02:45:45.754783', 'step': 16646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:45.799900', 'step': 16646, 'epoch': 3} {'type': 'loss', 'content': 0.10724024474620819, 'timestamp': '2025-09-10 02:45:45.802410', 'step': 16647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:45.833636', 'step': 16647, 'epoch': 3} {'type': 'loss', 'content': 0.031599100679159164, 'timestamp': '2025-09-10 02:45:45.857559', 'step': 16648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:45.900957', 'step': 16648, 'epoch': 3} {'type': 'loss', 'content': 0.10970267653465271, 'timestamp': '2025-09-10 02:45:45.907577', 'step': 16649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:45.941842', 'step': 16649, 'epoch': 3} {'type': 'loss', 'content': 0.042807258665561676, 'timestamp': '2025-09-10 02:45:45.946749', 'step': 16650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:45.981322', 'step': 16650, 'epoch': 3} {'type': 'loss', 'content': 0.05947679281234741, 'timestamp': '2025-09-10 02:45:45.985772', 'step': 16651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:46.020295', 'step': 16651, 'epoch': 3} {'type': 'loss', 'content': 0.0659298449754715, 'timestamp': '2025-09-10 02:45:46.047332', 'step': 16652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:46.078443', 'step': 16652, 'epoch': 3} {'type': 'loss', 'content': 0.0923740565776825, 'timestamp': '2025-09-10 02:45:46.081585', 'step': 16653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.115571', 'step': 16653, 'epoch': 3} {'type': 'loss', 'content': 0.06624589115381241, 'timestamp': '2025-09-10 02:45:46.118200', 'step': 16654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:46.152603', 'step': 16654, 'epoch': 3} {'type': 'loss', 'content': 0.09105058759450912, 'timestamp': '2025-09-10 02:45:46.155013', 'step': 16655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:46.186154', 'step': 16655, 'epoch': 3} {'type': 'loss', 'content': 0.07325523346662521, 'timestamp': '2025-09-10 02:45:46.210261', 'step': 16656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:46.241358', 'step': 16656, 'epoch': 3} {'type': 'loss', 'content': 0.0406428724527359, 'timestamp': '2025-09-10 02:45:46.243495', 'step': 16657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:46.274256', 'step': 16657, 'epoch': 3} {'type': 'loss', 'content': 0.07799487560987473, 'timestamp': '2025-09-10 02:45:46.276719', 'step': 16658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:46.307111', 'step': 16658, 'epoch': 3} {'type': 'loss', 'content': 0.11124008893966675, 'timestamp': '2025-09-10 02:45:46.310319', 'step': 16659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:46.343033', 'step': 16659, 'epoch': 3} {'type': 'loss', 'content': 0.11611449718475342, 'timestamp': '2025-09-10 02:45:46.366569', 'step': 16660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.397256', 'step': 16660, 'epoch': 3} {'type': 'loss', 'content': 0.046331461519002914, 'timestamp': '2025-09-10 02:45:46.399650', 'step': 16661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:46.431915', 'step': 16661, 'epoch': 3} {'type': 'loss', 'content': 0.02601080760359764, 'timestamp': '2025-09-10 02:45:46.435091', 'step': 16662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.467630', 'step': 16662, 'epoch': 3} {'type': 'loss', 'content': 0.07221449911594391, 'timestamp': '2025-09-10 02:45:46.470356', 'step': 16663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.500510', 'step': 16663, 'epoch': 3} {'type': 'loss', 'content': 0.04219481721520424, 'timestamp': '2025-09-10 02:45:46.525282', 'step': 16664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:46.556710', 'step': 16664, 'epoch': 3} {'type': 'loss', 'content': 0.13305117189884186, 'timestamp': '2025-09-10 02:45:46.559784', 'step': 16665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:46.591033', 'step': 16665, 'epoch': 3} {'type': 'loss', 'content': 0.09795578569173813, 'timestamp': '2025-09-10 02:45:46.594188', 'step': 16666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.623983', 'step': 16666, 'epoch': 3} {'type': 'loss', 'content': 0.11053826659917831, 'timestamp': '2025-09-10 02:45:46.628068', 'step': 16667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.661539', 'step': 16667, 'epoch': 3} {'type': 'loss', 'content': 0.047547388821840286, 'timestamp': '2025-09-10 02:45:46.685136', 'step': 16668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.715033', 'step': 16668, 'epoch': 3} {'type': 'loss', 'content': 0.05155131220817566, 'timestamp': '2025-09-10 02:45:46.717215', 'step': 16669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:46.747796', 'step': 16669, 'epoch': 3} {'type': 'loss', 'content': 0.08463948965072632, 'timestamp': '2025-09-10 02:45:46.750315', 'step': 16670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.782124', 'step': 16670, 'epoch': 3} {'type': 'loss', 'content': 0.09965409338474274, 'timestamp': '2025-09-10 02:45:46.784886', 'step': 16671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.815711', 'step': 16671, 'epoch': 3} {'type': 'loss', 'content': 0.07327497005462646, 'timestamp': '2025-09-10 02:45:46.839138', 'step': 16672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:46.869619', 'step': 16672, 'epoch': 3} {'type': 'loss', 'content': 0.06654544174671173, 'timestamp': '2025-09-10 02:45:46.874269', 'step': 16673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:46.908406', 'step': 16673, 'epoch': 3} {'type': 'loss', 'content': 0.07002734392881393, 'timestamp': '2025-09-10 02:45:46.910909', 'step': 16674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:46.944944', 'step': 16674, 'epoch': 3} {'type': 'loss', 'content': 0.016757791861891747, 'timestamp': '2025-09-10 02:45:46.947415', 'step': 16675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:46.978348', 'step': 16675, 'epoch': 3} {'type': 'loss', 'content': 0.03403396159410477, 'timestamp': '2025-09-10 02:45:47.002859', 'step': 16676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:47.033404', 'step': 16676, 'epoch': 3} {'type': 'loss', 'content': 0.04097570478916168, 'timestamp': '2025-09-10 02:45:47.035945', 'step': 16677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:47.065823', 'step': 16677, 'epoch': 3} {'type': 'loss', 'content': 0.04492006078362465, 'timestamp': '2025-09-10 02:45:47.069111', 'step': 16678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.101177', 'step': 16678, 'epoch': 3} {'type': 'loss', 'content': 0.038810763508081436, 'timestamp': '2025-09-10 02:45:47.103933', 'step': 16679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:47.134569', 'step': 16679, 'epoch': 3} {'type': 'loss', 'content': 0.06423575431108475, 'timestamp': '2025-09-10 02:45:47.158727', 'step': 16680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.189864', 'step': 16680, 'epoch': 3} {'type': 'loss', 'content': 0.06458258628845215, 'timestamp': '2025-09-10 02:45:47.193674', 'step': 16681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.225605', 'step': 16681, 'epoch': 3} {'type': 'loss', 'content': 0.04995441436767578, 'timestamp': '2025-09-10 02:45:47.228615', 'step': 16682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:47.260350', 'step': 16682, 'epoch': 3} {'type': 'loss', 'content': 0.10255224257707596, 'timestamp': '2025-09-10 02:45:47.264364', 'step': 16683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:47.295486', 'step': 16683, 'epoch': 3} {'type': 'loss', 'content': 0.11588487029075623, 'timestamp': '2025-09-10 02:45:47.319237', 'step': 16684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:47.350316', 'step': 16684, 'epoch': 3} {'type': 'loss', 'content': 0.07568199932575226, 'timestamp': '2025-09-10 02:45:47.352803', 'step': 16685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:47.383737', 'step': 16685, 'epoch': 3} {'type': 'loss', 'content': 0.04944335296750069, 'timestamp': '2025-09-10 02:45:47.387119', 'step': 16686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.419759', 'step': 16686, 'epoch': 3} {'type': 'loss', 'content': 0.061372481286525726, 'timestamp': '2025-09-10 02:45:47.423531', 'step': 16687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:47.454971', 'step': 16687, 'epoch': 3} {'type': 'loss', 'content': 0.06225926801562309, 'timestamp': '2025-09-10 02:45:47.478984', 'step': 16688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.509636', 'step': 16688, 'epoch': 3} {'type': 'loss', 'content': 0.04844668135046959, 'timestamp': '2025-09-10 02:45:47.512463', 'step': 16689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:47.546513', 'step': 16689, 'epoch': 3} {'type': 'loss', 'content': 0.047619324177503586, 'timestamp': '2025-09-10 02:45:47.548778', 'step': 16690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:47.580142', 'step': 16690, 'epoch': 3} {'type': 'loss', 'content': 0.05327915772795677, 'timestamp': '2025-09-10 02:45:47.582875', 'step': 16691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:47.614741', 'step': 16691, 'epoch': 3} {'type': 'loss', 'content': 0.043619345873594284, 'timestamp': '2025-09-10 02:45:47.639370', 'step': 16692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.669807', 'step': 16692, 'epoch': 3} {'type': 'loss', 'content': 0.1177634671330452, 'timestamp': '2025-09-10 02:45:47.672024', 'step': 16693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:47.702288', 'step': 16693, 'epoch': 3} {'type': 'loss', 'content': 0.11557076126337051, 'timestamp': '2025-09-10 02:45:47.705090', 'step': 16694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:47.736627', 'step': 16694, 'epoch': 3} {'type': 'loss', 'content': 0.11210449039936066, 'timestamp': '2025-09-10 02:45:47.739460', 'step': 16695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.769887', 'step': 16695, 'epoch': 3} {'type': 'loss', 'content': 0.10891154408454895, 'timestamp': '2025-09-10 02:45:47.794003', 'step': 16696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.828960', 'step': 16696, 'epoch': 3} {'type': 'loss', 'content': 0.07886475324630737, 'timestamp': '2025-09-10 02:45:47.832196', 'step': 16697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.863358', 'step': 16697, 'epoch': 3} {'type': 'loss', 'content': 0.09753678739070892, 'timestamp': '2025-09-10 02:45:47.866619', 'step': 16698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:47.901883', 'step': 16698, 'epoch': 3} {'type': 'loss', 'content': 0.06071384996175766, 'timestamp': '2025-09-10 02:45:47.904249', 'step': 16699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:47.934259', 'step': 16699, 'epoch': 3} {'type': 'loss', 'content': 0.049417849630117416, 'timestamp': '2025-09-10 02:45:47.957842', 'step': 16700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:47.988376', 'step': 16700, 'epoch': 3} {'type': 'loss', 'content': 0.09710095077753067, 'timestamp': '2025-09-10 02:45:47.990621', 'step': 16701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:48.022971', 'step': 16701, 'epoch': 3} {'type': 'loss', 'content': 0.06057961657643318, 'timestamp': '2025-09-10 02:45:48.025522', 'step': 16702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.056975', 'step': 16702, 'epoch': 3} {'type': 'loss', 'content': 0.031149592250585556, 'timestamp': '2025-09-10 02:45:48.060334', 'step': 16703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:48.090749', 'step': 16703, 'epoch': 3} {'type': 'loss', 'content': 0.03672993183135986, 'timestamp': '2025-09-10 02:45:48.115022', 'step': 16704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:48.145695', 'step': 16704, 'epoch': 3} {'type': 'loss', 'content': 0.07852162420749664, 'timestamp': '2025-09-10 02:45:48.148146', 'step': 16705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.179593', 'step': 16705, 'epoch': 3} {'type': 'loss', 'content': 0.02710428275167942, 'timestamp': '2025-09-10 02:45:48.183961', 'step': 16706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.223252', 'step': 16706, 'epoch': 3} {'type': 'loss', 'content': 0.08017998188734055, 'timestamp': '2025-09-10 02:45:48.228728', 'step': 16707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:48.264332', 'step': 16707, 'epoch': 3} {'type': 'loss', 'content': 0.0638672262430191, 'timestamp': '2025-09-10 02:45:48.288294', 'step': 16708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.324734', 'step': 16708, 'epoch': 3} {'type': 'loss', 'content': 0.035554755479097366, 'timestamp': '2025-09-10 02:45:48.328692', 'step': 16709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:48.362999', 'step': 16709, 'epoch': 3} {'type': 'loss', 'content': 0.08053433895111084, 'timestamp': '2025-09-10 02:45:48.367450', 'step': 16710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:48.402776', 'step': 16710, 'epoch': 3} {'type': 'loss', 'content': 0.07471832633018494, 'timestamp': '2025-09-10 02:45:48.405913', 'step': 16711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.440019', 'step': 16711, 'epoch': 3} {'type': 'loss', 'content': 0.07110287994146347, 'timestamp': '2025-09-10 02:45:48.465147', 'step': 16712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.495406', 'step': 16712, 'epoch': 3} {'type': 'loss', 'content': 0.05792193114757538, 'timestamp': '2025-09-10 02:45:48.498964', 'step': 16713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.532198', 'step': 16713, 'epoch': 3} {'type': 'loss', 'content': 0.09046540409326553, 'timestamp': '2025-09-10 02:45:48.538275', 'step': 16714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:48.571432', 'step': 16714, 'epoch': 3} {'type': 'loss', 'content': 0.07607220113277435, 'timestamp': '2025-09-10 02:45:48.574794', 'step': 16715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.606779', 'step': 16715, 'epoch': 3} {'type': 'loss', 'content': 0.12014392018318176, 'timestamp': '2025-09-10 02:45:48.631571', 'step': 16716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.665469', 'step': 16716, 'epoch': 3} {'type': 'loss', 'content': 0.17860503494739532, 'timestamp': '2025-09-10 02:45:48.669256', 'step': 16717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:48.698555', 'step': 16717, 'epoch': 3} {'type': 'loss', 'content': 0.13140912353992462, 'timestamp': '2025-09-10 02:45:48.700774', 'step': 16718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.734749', 'step': 16718, 'epoch': 3} {'type': 'loss', 'content': 0.0860753208398819, 'timestamp': '2025-09-10 02:45:48.739496', 'step': 16719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:48.772956', 'step': 16719, 'epoch': 3} {'type': 'loss', 'content': 0.0553930327296257, 'timestamp': '2025-09-10 02:45:48.797420', 'step': 16720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:48.832590', 'step': 16720, 'epoch': 3} {'type': 'loss', 'content': 0.0634286180138588, 'timestamp': '2025-09-10 02:45:48.835482', 'step': 16721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:48.868186', 'step': 16721, 'epoch': 3} {'type': 'loss', 'content': 0.07461678981781006, 'timestamp': '2025-09-10 02:45:48.872020', 'step': 16722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:48.912337', 'step': 16722, 'epoch': 3} {'type': 'loss', 'content': 0.08417200297117233, 'timestamp': '2025-09-10 02:45:48.914694', 'step': 16723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:48.948776', 'step': 16723, 'epoch': 3} {'type': 'loss', 'content': 0.09652593731880188, 'timestamp': '2025-09-10 02:45:48.972799', 'step': 16724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:49.004924', 'step': 16724, 'epoch': 3} {'type': 'loss', 'content': 0.1279994249343872, 'timestamp': '2025-09-10 02:45:49.007522', 'step': 16725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:49.037662', 'step': 16725, 'epoch': 3} {'type': 'loss', 'content': 0.12136056274175644, 'timestamp': '2025-09-10 02:45:49.042185', 'step': 16726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:49.075959', 'step': 16726, 'epoch': 3} {'type': 'loss', 'content': 0.10421760380268097, 'timestamp': '2025-09-10 02:45:49.080574', 'step': 16727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:49.111974', 'step': 16727, 'epoch': 3} {'type': 'loss', 'content': 0.06294356286525726, 'timestamp': '2025-09-10 02:45:49.135543', 'step': 16728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:49.169586', 'step': 16728, 'epoch': 3} {'type': 'loss', 'content': 0.0874946266412735, 'timestamp': '2025-09-10 02:45:49.172406', 'step': 16729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:45:49.203241', 'step': 16729, 'epoch': 3} {'type': 'loss', 'content': 0.14242975413799286, 'timestamp': '2025-09-10 02:45:49.208009', 'step': 16730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:49.240929', 'step': 16730, 'epoch': 3} {'type': 'loss', 'content': 0.025852037593722343, 'timestamp': '2025-09-10 02:45:49.244528', 'step': 16731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:49.276615', 'step': 16731, 'epoch': 3} {'type': 'loss', 'content': 0.06402324140071869, 'timestamp': '2025-09-10 02:45:49.300479', 'step': 16732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:49.332734', 'step': 16732, 'epoch': 3} {'type': 'loss', 'content': 0.06387776136398315, 'timestamp': '2025-09-10 02:45:49.335403', 'step': 16733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:45:49.370392', 'step': 16733, 'epoch': 3} {'type': 'loss', 'content': 0.040550269186496735, 'timestamp': '2025-09-10 02:45:49.389539', 'step': 16734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:49.421499', 'step': 16734, 'epoch': 3} {'type': 'loss', 'content': 0.03867442160844803, 'timestamp': '2025-09-10 02:45:49.424076', 'step': 16735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:49.454945', 'step': 16735, 'epoch': 3} {'type': 'loss', 'content': 0.0821285992860794, 'timestamp': '2025-09-10 02:45:49.478840', 'step': 16736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:49.510972', 'step': 16736, 'epoch': 3} {'type': 'loss', 'content': 0.09204693138599396, 'timestamp': '2025-09-10 02:45:49.513300', 'step': 16737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:49.543775', 'step': 16737, 'epoch': 3} {'type': 'loss', 'content': 0.04271502047777176, 'timestamp': '2025-09-10 02:45:49.547568', 'step': 16738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:49.578091', 'step': 16738, 'epoch': 3} {'type': 'loss', 'content': 0.10845523327589035, 'timestamp': '2025-09-10 02:45:49.581785', 'step': 16739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:49.614971', 'step': 16739, 'epoch': 3} {'type': 'loss', 'content': 0.10654956102371216, 'timestamp': '2025-09-10 02:45:49.638605', 'step': 16740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:49.672964', 'step': 16740, 'epoch': 3} {'type': 'loss', 'content': 0.03303033486008644, 'timestamp': '2025-09-10 02:45:49.675536', 'step': 16741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:49.706208', 'step': 16741, 'epoch': 3} {'type': 'loss', 'content': 0.08295521140098572, 'timestamp': '2025-09-10 02:45:49.708681', 'step': 16742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:49.739494', 'step': 16742, 'epoch': 3} {'type': 'loss', 'content': 0.07770221680402756, 'timestamp': '2025-09-10 02:45:49.742363', 'step': 16743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:49.774759', 'step': 16743, 'epoch': 3} {'type': 'loss', 'content': 0.07751099020242691, 'timestamp': '2025-09-10 02:45:49.798461', 'step': 16744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:45:49.831265', 'step': 16744, 'epoch': 3} {'type': 'loss', 'content': 0.08075389266014099, 'timestamp': '2025-09-10 02:45:49.833848', 'step': 16745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:49.864433', 'step': 16745, 'epoch': 3} {'type': 'loss', 'content': 0.06205572187900543, 'timestamp': '2025-09-10 02:45:49.866867', 'step': 16746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:49.897797', 'step': 16746, 'epoch': 3} {'type': 'loss', 'content': 0.1081862598657608, 'timestamp': '2025-09-10 02:45:49.901538', 'step': 16747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:49.934592', 'step': 16747, 'epoch': 3} {'type': 'loss', 'content': 0.03168554976582527, 'timestamp': '2025-09-10 02:45:49.958266', 'step': 16748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:49.988763', 'step': 16748, 'epoch': 3} {'type': 'loss', 'content': 0.0600602924823761, 'timestamp': '2025-09-10 02:45:49.991143', 'step': 16749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:50.022433', 'step': 16749, 'epoch': 3} {'type': 'loss', 'content': 0.029097408056259155, 'timestamp': '2025-09-10 02:45:50.024701', 'step': 16750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:50.054916', 'step': 16750, 'epoch': 3} {'type': 'loss', 'content': 0.052880238741636276, 'timestamp': '2025-09-10 02:45:50.057093', 'step': 16751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:50.088929', 'step': 16751, 'epoch': 3} {'type': 'loss', 'content': 0.07942208647727966, 'timestamp': '2025-09-10 02:45:50.112529', 'step': 16752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:50.143233', 'step': 16752, 'epoch': 3} {'type': 'loss', 'content': 0.08186636120080948, 'timestamp': '2025-09-10 02:45:50.145746', 'step': 16753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:50.175972', 'step': 16753, 'epoch': 3} {'type': 'loss', 'content': 0.09413067251443863, 'timestamp': '2025-09-10 02:45:50.179233', 'step': 16754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:50.209866', 'step': 16754, 'epoch': 3} {'type': 'loss', 'content': 0.03388813138008118, 'timestamp': '2025-09-10 02:45:50.216354', 'step': 16755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:50.247243', 'step': 16755, 'epoch': 3} {'type': 'loss', 'content': 0.10515902191400528, 'timestamp': '2025-09-10 02:45:50.271096', 'step': 16756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:50.302255', 'step': 16756, 'epoch': 3} {'type': 'loss', 'content': 0.01780327409505844, 'timestamp': '2025-09-10 02:45:50.304605', 'step': 16757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:45:50.335196', 'step': 16757, 'epoch': 3} {'type': 'loss', 'content': 0.09581834822893143, 'timestamp': '2025-09-10 02:45:50.337816', 'step': 16758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:50.370489', 'step': 16758, 'epoch': 3} {'type': 'loss', 'content': 0.09303493797779083, 'timestamp': '2025-09-10 02:45:50.373124', 'step': 16759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:50.402975', 'step': 16759, 'epoch': 3} {'type': 'loss', 'content': 0.13132932782173157, 'timestamp': '2025-09-10 02:45:50.426707', 'step': 16760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:50.459308', 'step': 16760, 'epoch': 3} {'type': 'loss', 'content': 0.022830596193671227, 'timestamp': '2025-09-10 02:45:50.462281', 'step': 16761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:50.492545', 'step': 16761, 'epoch': 3} {'type': 'loss', 'content': 0.04243037849664688, 'timestamp': '2025-09-10 02:45:50.495660', 'step': 16762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:50.529103', 'step': 16762, 'epoch': 3} {'type': 'loss', 'content': 0.031503841280937195, 'timestamp': '2025-09-10 02:45:50.531410', 'step': 16763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:50.561979', 'step': 16763, 'epoch': 3} {'type': 'loss', 'content': 0.0735417976975441, 'timestamp': '2025-09-10 02:45:50.586055', 'step': 16764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:50.617261', 'step': 16764, 'epoch': 3} {'type': 'loss', 'content': 0.09422112256288528, 'timestamp': '2025-09-10 02:45:50.620362', 'step': 16765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:50.651557', 'step': 16765, 'epoch': 3} {'type': 'loss', 'content': 0.046134836971759796, 'timestamp': '2025-09-10 02:45:50.655033', 'step': 16766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:50.688971', 'step': 16766, 'epoch': 3} {'type': 'loss', 'content': 0.060926634818315506, 'timestamp': '2025-09-10 02:45:50.694453', 'step': 16767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:50.726703', 'step': 16767, 'epoch': 3} {'type': 'loss', 'content': 0.07244286686182022, 'timestamp': '2025-09-10 02:45:50.750758', 'step': 16768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:50.781582', 'step': 16768, 'epoch': 3} {'type': 'loss', 'content': 0.07586821913719177, 'timestamp': '2025-09-10 02:45:50.784018', 'step': 16769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:50.813864', 'step': 16769, 'epoch': 3} {'type': 'loss', 'content': 0.0648575946688652, 'timestamp': '2025-09-10 02:45:50.816538', 'step': 16770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:50.848157', 'step': 16770, 'epoch': 3} {'type': 'loss', 'content': 0.035844773054122925, 'timestamp': '2025-09-10 02:45:50.851387', 'step': 16771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:50.882570', 'step': 16771, 'epoch': 3} {'type': 'loss', 'content': 0.08215022087097168, 'timestamp': '2025-09-10 02:45:50.913002', 'step': 16772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:50.946254', 'step': 16772, 'epoch': 3} {'type': 'loss', 'content': 0.08810373395681381, 'timestamp': '2025-09-10 02:45:50.948473', 'step': 16773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:50.978506', 'step': 16773, 'epoch': 3} {'type': 'loss', 'content': 0.06770754605531693, 'timestamp': '2025-09-10 02:45:50.980837', 'step': 16774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:51.010897', 'step': 16774, 'epoch': 3} {'type': 'loss', 'content': 0.06628385186195374, 'timestamp': '2025-09-10 02:45:51.013700', 'step': 16775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:51.046583', 'step': 16775, 'epoch': 3} {'type': 'loss', 'content': 0.0633913055062294, 'timestamp': '2025-09-10 02:45:51.069968', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:45:59.086480', 'step': 16776, 'epoch': 3} {'type': 'pplx', 'content': 9946.920279761964, 'timestamp': '2025-09-10 02:45:59.090194', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:59.121024', 'step': 16776, 'epoch': 3} {'type': 'loss', 'content': 0.07612535357475281, 'timestamp': '2025-09-10 02:45:59.124098', 'step': 16777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.155495', 'step': 16777, 'epoch': 3} {'type': 'loss', 'content': 0.036038633435964584, 'timestamp': '2025-09-10 02:45:59.157764', 'step': 16778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:59.189896', 'step': 16778, 'epoch': 3} {'type': 'loss', 'content': 0.05482441186904907, 'timestamp': '2025-09-10 02:45:59.193824', 'step': 16779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:59.225511', 'step': 16779, 'epoch': 3} {'type': 'loss', 'content': 0.09659811854362488, 'timestamp': '2025-09-10 02:45:59.249648', 'step': 16780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:59.283586', 'step': 16780, 'epoch': 3} {'type': 'loss', 'content': 0.06894533336162567, 'timestamp': '2025-09-10 02:45:59.286456', 'step': 16781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:59.316731', 'step': 16781, 'epoch': 3} {'type': 'loss', 'content': 0.059726107865571976, 'timestamp': '2025-09-10 02:45:59.319819', 'step': 16782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:59.350479', 'step': 16782, 'epoch': 3} {'type': 'loss', 'content': 0.063126340508461, 'timestamp': '2025-09-10 02:45:59.352904', 'step': 16783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:59.383245', 'step': 16783, 'epoch': 3} {'type': 'loss', 'content': 0.13614977896213531, 'timestamp': '2025-09-10 02:45:59.406992', 'step': 16784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.437111', 'step': 16784, 'epoch': 3} {'type': 'loss', 'content': 0.07693906873464584, 'timestamp': '2025-09-10 02:45:59.439481', 'step': 16785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:59.469621', 'step': 16785, 'epoch': 3} {'type': 'loss', 'content': 0.0661931112408638, 'timestamp': '2025-09-10 02:45:59.472268', 'step': 16786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.504783', 'step': 16786, 'epoch': 3} {'type': 'loss', 'content': 0.07566100358963013, 'timestamp': '2025-09-10 02:45:59.507183', 'step': 16787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:59.537076', 'step': 16787, 'epoch': 3} {'type': 'loss', 'content': 0.10515978932380676, 'timestamp': '2025-09-10 02:45:59.560637', 'step': 16788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.590350', 'step': 16788, 'epoch': 3} {'type': 'loss', 'content': 0.10698746144771576, 'timestamp': '2025-09-10 02:45:59.592723', 'step': 16789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.623189', 'step': 16789, 'epoch': 3} {'type': 'loss', 'content': 0.05206173658370972, 'timestamp': '2025-09-10 02:45:59.625464', 'step': 16790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.655502', 'step': 16790, 'epoch': 3} {'type': 'loss', 'content': 0.09673169255256653, 'timestamp': '2025-09-10 02:45:59.658444', 'step': 16791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.689396', 'step': 16791, 'epoch': 3} {'type': 'loss', 'content': 0.047400470823049545, 'timestamp': '2025-09-10 02:45:59.712920', 'step': 16792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:59.742923', 'step': 16792, 'epoch': 3} {'type': 'loss', 'content': 0.1038895696401596, 'timestamp': '2025-09-10 02:45:59.745182', 'step': 16793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:45:59.774646', 'step': 16793, 'epoch': 3} {'type': 'loss', 'content': 0.0083907600492239, 'timestamp': '2025-09-10 02:45:59.777301', 'step': 16794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:45:59.808425', 'step': 16794, 'epoch': 3} {'type': 'loss', 'content': 0.030696123838424683, 'timestamp': '2025-09-10 02:45:59.810827', 'step': 16795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.840960', 'step': 16795, 'epoch': 3} {'type': 'loss', 'content': 0.02674882858991623, 'timestamp': '2025-09-10 02:45:59.864589', 'step': 16796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:45:59.895161', 'step': 16796, 'epoch': 3} {'type': 'loss', 'content': 0.08231183886528015, 'timestamp': '2025-09-10 02:45:59.897427', 'step': 16797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.927325', 'step': 16797, 'epoch': 3} {'type': 'loss', 'content': 0.033470168709754944, 'timestamp': '2025-09-10 02:45:59.930252', 'step': 16798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.963084', 'step': 16798, 'epoch': 3} {'type': 'loss', 'content': 0.022163353860378265, 'timestamp': '2025-09-10 02:45:59.965176', 'step': 16799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:45:59.994861', 'step': 16799, 'epoch': 3} {'type': 'loss', 'content': 0.04922077804803848, 'timestamp': '2025-09-10 02:46:00.018825', 'step': 16800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.050324', 'step': 16800, 'epoch': 3} {'type': 'loss', 'content': 0.07782413810491562, 'timestamp': '2025-09-10 02:46:00.053285', 'step': 16801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.084186', 'step': 16801, 'epoch': 3} {'type': 'loss', 'content': 0.023891085758805275, 'timestamp': '2025-09-10 02:46:00.086532', 'step': 16802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.115772', 'step': 16802, 'epoch': 3} {'type': 'loss', 'content': 0.0796828642487526, 'timestamp': '2025-09-10 02:46:00.118110', 'step': 16803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.147770', 'step': 16803, 'epoch': 3} {'type': 'loss', 'content': 0.0743851438164711, 'timestamp': '2025-09-10 02:46:00.171631', 'step': 16804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.202743', 'step': 16804, 'epoch': 3} {'type': 'loss', 'content': 0.03439708799123764, 'timestamp': '2025-09-10 02:46:00.205409', 'step': 16805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:00.235376', 'step': 16805, 'epoch': 3} {'type': 'loss', 'content': 0.04171892628073692, 'timestamp': '2025-09-10 02:46:00.237901', 'step': 16806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:00.268407', 'step': 16806, 'epoch': 3} {'type': 'loss', 'content': 0.0588349774479866, 'timestamp': '2025-09-10 02:46:00.271853', 'step': 16807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:00.302257', 'step': 16807, 'epoch': 3} {'type': 'loss', 'content': 0.07824325561523438, 'timestamp': '2025-09-10 02:46:00.325679', 'step': 16808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:00.355687', 'step': 16808, 'epoch': 3} {'type': 'loss', 'content': 0.03289434686303139, 'timestamp': '2025-09-10 02:46:00.358104', 'step': 16809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.388287', 'step': 16809, 'epoch': 3} {'type': 'loss', 'content': 0.05242925509810448, 'timestamp': '2025-09-10 02:46:00.390419', 'step': 16810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:00.419741', 'step': 16810, 'epoch': 3} {'type': 'loss', 'content': 0.03419485688209534, 'timestamp': '2025-09-10 02:46:00.422529', 'step': 16811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.451976', 'step': 16811, 'epoch': 3} {'type': 'loss', 'content': 0.020671600475907326, 'timestamp': '2025-09-10 02:46:00.475551', 'step': 16812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.508126', 'step': 16812, 'epoch': 3} {'type': 'loss', 'content': 0.032212886959314346, 'timestamp': '2025-09-10 02:46:00.510586', 'step': 16813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:00.541143', 'step': 16813, 'epoch': 3} {'type': 'loss', 'content': 0.07073080539703369, 'timestamp': '2025-09-10 02:46:00.543964', 'step': 16814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:00.574661', 'step': 16814, 'epoch': 3} {'type': 'loss', 'content': 0.05354376882314682, 'timestamp': '2025-09-10 02:46:00.577767', 'step': 16815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:00.608294', 'step': 16815, 'epoch': 3} {'type': 'loss', 'content': 0.07606837898492813, 'timestamp': '2025-09-10 02:46:00.632111', 'step': 16816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:00.662622', 'step': 16816, 'epoch': 3} {'type': 'loss', 'content': 0.04321306198835373, 'timestamp': '2025-09-10 02:46:00.665023', 'step': 16817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:00.694929', 'step': 16817, 'epoch': 3} {'type': 'loss', 'content': 0.02932792715728283, 'timestamp': '2025-09-10 02:46:00.697591', 'step': 16818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.728908', 'step': 16818, 'epoch': 3} {'type': 'loss', 'content': 0.07309501618146896, 'timestamp': '2025-09-10 02:46:00.731595', 'step': 16819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:00.761678', 'step': 16819, 'epoch': 3} {'type': 'loss', 'content': 0.06196335703134537, 'timestamp': '2025-09-10 02:46:00.785060', 'step': 16820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:00.815001', 'step': 16820, 'epoch': 3} {'type': 'loss', 'content': 0.0752611830830574, 'timestamp': '2025-09-10 02:46:00.817516', 'step': 16821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:00.847216', 'step': 16821, 'epoch': 3} {'type': 'loss', 'content': 0.08700791001319885, 'timestamp': '2025-09-10 02:46:00.849819', 'step': 16822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:00.880235', 'step': 16822, 'epoch': 3} {'type': 'loss', 'content': 0.09601426869630814, 'timestamp': '2025-09-10 02:46:00.882828', 'step': 16823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:00.913216', 'step': 16823, 'epoch': 3} {'type': 'loss', 'content': 0.137538880109787, 'timestamp': '2025-09-10 02:46:00.936913', 'step': 16824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:00.967156', 'step': 16824, 'epoch': 3} {'type': 'loss', 'content': 0.022834401577711105, 'timestamp': '2025-09-10 02:46:00.969607', 'step': 16825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:00.999737', 'step': 16825, 'epoch': 3} {'type': 'loss', 'content': 0.058204978704452515, 'timestamp': '2025-09-10 02:46:01.002227', 'step': 16826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:01.032469', 'step': 16826, 'epoch': 3} {'type': 'loss', 'content': 0.10535359382629395, 'timestamp': '2025-09-10 02:46:01.035339', 'step': 16827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:01.066150', 'step': 16827, 'epoch': 3} {'type': 'loss', 'content': 0.0758870542049408, 'timestamp': '2025-09-10 02:46:01.089985', 'step': 16828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:01.121417', 'step': 16828, 'epoch': 3} {'type': 'loss', 'content': 0.07703279703855515, 'timestamp': '2025-09-10 02:46:01.124012', 'step': 16829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:01.154174', 'step': 16829, 'epoch': 3} {'type': 'loss', 'content': 0.06014792621135712, 'timestamp': '2025-09-10 02:46:01.156654', 'step': 16830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:01.186796', 'step': 16830, 'epoch': 3} {'type': 'loss', 'content': 0.041462771594524384, 'timestamp': '2025-09-10 02:46:01.189464', 'step': 16831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:01.219101', 'step': 16831, 'epoch': 3} {'type': 'loss', 'content': 0.05493904650211334, 'timestamp': '2025-09-10 02:46:01.244032', 'step': 16832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:01.282593', 'step': 16832, 'epoch': 3} {'type': 'loss', 'content': 0.03624696284532547, 'timestamp': '2025-09-10 02:46:01.285103', 'step': 16833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:01.325169', 'step': 16833, 'epoch': 3} {'type': 'loss', 'content': 0.08139370381832123, 'timestamp': '2025-09-10 02:46:01.330160', 'step': 16834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:01.364967', 'step': 16834, 'epoch': 3} {'type': 'loss', 'content': 0.08785068243741989, 'timestamp': '2025-09-10 02:46:01.368455', 'step': 16835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:01.403754', 'step': 16835, 'epoch': 3} {'type': 'loss', 'content': 0.07900731265544891, 'timestamp': '2025-09-10 02:46:01.427324', 'step': 16836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:01.459733', 'step': 16836, 'epoch': 3} {'type': 'loss', 'content': 0.08852527290582657, 'timestamp': '2025-09-10 02:46:01.462421', 'step': 16837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:01.493775', 'step': 16837, 'epoch': 3} {'type': 'loss', 'content': 0.09339144825935364, 'timestamp': '2025-09-10 02:46:01.496359', 'step': 16838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:01.527892', 'step': 16838, 'epoch': 3} {'type': 'loss', 'content': 0.08889246731996536, 'timestamp': '2025-09-10 02:46:01.530900', 'step': 16839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:01.561426', 'step': 16839, 'epoch': 3} {'type': 'loss', 'content': 0.04491916671395302, 'timestamp': '2025-09-10 02:46:01.585298', 'step': 16840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:01.615974', 'step': 16840, 'epoch': 3} {'type': 'loss', 'content': 0.11413542926311493, 'timestamp': '2025-09-10 02:46:01.618740', 'step': 16841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:01.648895', 'step': 16841, 'epoch': 3} {'type': 'loss', 'content': 0.02647116407752037, 'timestamp': '2025-09-10 02:46:01.651188', 'step': 16842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:01.691523', 'step': 16842, 'epoch': 3} {'type': 'loss', 'content': 0.03348948806524277, 'timestamp': '2025-09-10 02:46:01.693652', 'step': 16843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:01.724587', 'step': 16843, 'epoch': 3} {'type': 'loss', 'content': 0.08803541958332062, 'timestamp': '2025-09-10 02:46:01.748324', 'step': 16844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:01.778401', 'step': 16844, 'epoch': 3} {'type': 'loss', 'content': 0.1151217669248581, 'timestamp': '2025-09-10 02:46:01.780730', 'step': 16845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:01.811660', 'step': 16845, 'epoch': 3} {'type': 'loss', 'content': 0.09310279786586761, 'timestamp': '2025-09-10 02:46:01.814110', 'step': 16846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:01.844874', 'step': 16846, 'epoch': 3} {'type': 'loss', 'content': 0.023996736854314804, 'timestamp': '2025-09-10 02:46:01.847616', 'step': 16847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:01.878030', 'step': 16847, 'epoch': 3} {'type': 'loss', 'content': 0.054368846118450165, 'timestamp': '2025-09-10 02:46:01.901772', 'step': 16848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:01.936240', 'step': 16848, 'epoch': 3} {'type': 'loss', 'content': 0.07430785894393921, 'timestamp': '2025-09-10 02:46:01.939418', 'step': 16849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:01.969640', 'step': 16849, 'epoch': 3} {'type': 'loss', 'content': 0.027623716741800308, 'timestamp': '2025-09-10 02:46:01.972161', 'step': 16850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:02.002198', 'step': 16850, 'epoch': 3} {'type': 'loss', 'content': 0.06945700943470001, 'timestamp': '2025-09-10 02:46:02.006886', 'step': 16851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:02.050129', 'step': 16851, 'epoch': 3} {'type': 'loss', 'content': 0.06809655576944351, 'timestamp': '2025-09-10 02:46:02.076494', 'step': 16852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:02.113206', 'step': 16852, 'epoch': 3} {'type': 'loss', 'content': 0.10388418287038803, 'timestamp': '2025-09-10 02:46:02.115758', 'step': 16853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:02.146992', 'step': 16853, 'epoch': 3} {'type': 'loss', 'content': 0.04603726789355278, 'timestamp': '2025-09-10 02:46:02.149455', 'step': 16854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:02.178834', 'step': 16854, 'epoch': 3} {'type': 'loss', 'content': 0.0859474316239357, 'timestamp': '2025-09-10 02:46:02.181717', 'step': 16855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:02.215442', 'step': 16855, 'epoch': 3} {'type': 'loss', 'content': 0.12070005387067795, 'timestamp': '2025-09-10 02:46:02.238999', 'step': 16856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:02.275084', 'step': 16856, 'epoch': 3} {'type': 'loss', 'content': 0.027409406378865242, 'timestamp': '2025-09-10 02:46:02.279110', 'step': 16857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:02.310169', 'step': 16857, 'epoch': 3} {'type': 'loss', 'content': 0.11331994831562042, 'timestamp': '2025-09-10 02:46:02.313872', 'step': 16858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:02.347947', 'step': 16858, 'epoch': 3} {'type': 'loss', 'content': 0.09039409458637238, 'timestamp': '2025-09-10 02:46:02.350663', 'step': 16859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:02.381407', 'step': 16859, 'epoch': 3} {'type': 'loss', 'content': 0.07287640124559402, 'timestamp': '2025-09-10 02:46:02.407145', 'step': 16860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:02.438352', 'step': 16860, 'epoch': 3} {'type': 'loss', 'content': 0.119805708527565, 'timestamp': '2025-09-10 02:46:02.440794', 'step': 16861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:02.476191', 'step': 16861, 'epoch': 3} {'type': 'loss', 'content': 0.05158200487494469, 'timestamp': '2025-09-10 02:46:02.481221', 'step': 16862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:02.512639', 'step': 16862, 'epoch': 3} {'type': 'loss', 'content': 0.054197508841753006, 'timestamp': '2025-09-10 02:46:02.517464', 'step': 16863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:02.549319', 'step': 16863, 'epoch': 3} {'type': 'loss', 'content': 0.1417819708585739, 'timestamp': '2025-09-10 02:46:02.573161', 'step': 16864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:02.603461', 'step': 16864, 'epoch': 3} {'type': 'loss', 'content': 0.13066305220127106, 'timestamp': '2025-09-10 02:46:02.606685', 'step': 16865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:02.636860', 'step': 16865, 'epoch': 3} {'type': 'loss', 'content': 0.09042879194021225, 'timestamp': '2025-09-10 02:46:02.639261', 'step': 16866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:02.669693', 'step': 16866, 'epoch': 3} {'type': 'loss', 'content': 0.06661731749773026, 'timestamp': '2025-09-10 02:46:02.672796', 'step': 16867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:02.710718', 'step': 16867, 'epoch': 3} {'type': 'loss', 'content': 0.06581305712461472, 'timestamp': '2025-09-10 02:46:02.734380', 'step': 16868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:02.766715', 'step': 16868, 'epoch': 3} {'type': 'loss', 'content': 0.23259924352169037, 'timestamp': '2025-09-10 02:46:02.769593', 'step': 16869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:02.800023', 'step': 16869, 'epoch': 3} {'type': 'loss', 'content': 0.0824696272611618, 'timestamp': '2025-09-10 02:46:02.802441', 'step': 16870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:02.833370', 'step': 16870, 'epoch': 3} {'type': 'loss', 'content': 0.12209028005599976, 'timestamp': '2025-09-10 02:46:02.835837', 'step': 16871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:02.865789', 'step': 16871, 'epoch': 3} {'type': 'loss', 'content': 0.0386035293340683, 'timestamp': '2025-09-10 02:46:02.889326', 'step': 16872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:02.921819', 'step': 16872, 'epoch': 3} {'type': 'loss', 'content': 0.056869108229875565, 'timestamp': '2025-09-10 02:46:02.924499', 'step': 16873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:02.964514', 'step': 16873, 'epoch': 3} {'type': 'loss', 'content': 0.035713013261556625, 'timestamp': '2025-09-10 02:46:02.967074', 'step': 16874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:02.997096', 'step': 16874, 'epoch': 3} {'type': 'loss', 'content': 0.09478124976158142, 'timestamp': '2025-09-10 02:46:03.000384', 'step': 16875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:03.031612', 'step': 16875, 'epoch': 3} {'type': 'loss', 'content': 0.03726249188184738, 'timestamp': '2025-09-10 02:46:03.055672', 'step': 16876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:03.089135', 'step': 16876, 'epoch': 3} {'type': 'loss', 'content': 0.14288397133350372, 'timestamp': '2025-09-10 02:46:03.091575', 'step': 16877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:03.129581', 'step': 16877, 'epoch': 3} {'type': 'loss', 'content': 0.06573422998189926, 'timestamp': '2025-09-10 02:46:03.132036', 'step': 16878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:03.161781', 'step': 16878, 'epoch': 3} {'type': 'loss', 'content': 0.05162844434380531, 'timestamp': '2025-09-10 02:46:03.164231', 'step': 16879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:03.193918', 'step': 16879, 'epoch': 3} {'type': 'loss', 'content': 0.07695681601762772, 'timestamp': '2025-09-10 02:46:03.217558', 'step': 16880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:03.249725', 'step': 16880, 'epoch': 3} {'type': 'loss', 'content': 0.045080024749040604, 'timestamp': '2025-09-10 02:46:03.252127', 'step': 16881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:03.281753', 'step': 16881, 'epoch': 3} {'type': 'loss', 'content': 0.07590074837207794, 'timestamp': '2025-09-10 02:46:03.284479', 'step': 16882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:03.315041', 'step': 16882, 'epoch': 3} {'type': 'loss', 'content': 0.04614412039518356, 'timestamp': '2025-09-10 02:46:03.318082', 'step': 16883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:03.350192', 'step': 16883, 'epoch': 3} {'type': 'loss', 'content': 0.04605632647871971, 'timestamp': '2025-09-10 02:46:03.373842', 'step': 16884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:03.403578', 'step': 16884, 'epoch': 3} {'type': 'loss', 'content': 0.02216842770576477, 'timestamp': '2025-09-10 02:46:03.406069', 'step': 16885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:03.435994', 'step': 16885, 'epoch': 3} {'type': 'loss', 'content': 0.09359083324670792, 'timestamp': '2025-09-10 02:46:03.438477', 'step': 16886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:03.468238', 'step': 16886, 'epoch': 3} {'type': 'loss', 'content': 0.1714298278093338, 'timestamp': '2025-09-10 02:46:03.470804', 'step': 16887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:03.500529', 'step': 16887, 'epoch': 3} {'type': 'loss', 'content': 0.10382673144340515, 'timestamp': '2025-09-10 02:46:03.524074', 'step': 16888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:03.557126', 'step': 16888, 'epoch': 3} {'type': 'loss', 'content': 0.048850174993276596, 'timestamp': '2025-09-10 02:46:03.559357', 'step': 16889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:03.588805', 'step': 16889, 'epoch': 3} {'type': 'loss', 'content': 0.08053197711706161, 'timestamp': '2025-09-10 02:46:03.591252', 'step': 16890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:03.620566', 'step': 16890, 'epoch': 3} {'type': 'loss', 'content': 0.06599827110767365, 'timestamp': '2025-09-10 02:46:03.623389', 'step': 16891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:03.654446', 'step': 16891, 'epoch': 3} {'type': 'loss', 'content': 0.06155086308717728, 'timestamp': '2025-09-10 02:46:03.678365', 'step': 16892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:03.708599', 'step': 16892, 'epoch': 3} {'type': 'loss', 'content': 0.07065378129482269, 'timestamp': '2025-09-10 02:46:03.711166', 'step': 16893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:03.741520', 'step': 16893, 'epoch': 3} {'type': 'loss', 'content': 0.06695723533630371, 'timestamp': '2025-09-10 02:46:03.744071', 'step': 16894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:03.774771', 'step': 16894, 'epoch': 3} {'type': 'loss', 'content': 0.05065435543656349, 'timestamp': '2025-09-10 02:46:03.777246', 'step': 16895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:03.807317', 'step': 16895, 'epoch': 3} {'type': 'loss', 'content': 0.06710826605558395, 'timestamp': '2025-09-10 02:46:03.831036', 'step': 16896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:03.863132', 'step': 16896, 'epoch': 3} {'type': 'loss', 'content': 0.033530645072460175, 'timestamp': '2025-09-10 02:46:03.866585', 'step': 16897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:03.897662', 'step': 16897, 'epoch': 3} {'type': 'loss', 'content': 0.05623067915439606, 'timestamp': '2025-09-10 02:46:03.899917', 'step': 16898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:03.929971', 'step': 16898, 'epoch': 3} {'type': 'loss', 'content': 0.06938136368989944, 'timestamp': '2025-09-10 02:46:03.933359', 'step': 16899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:03.968024', 'step': 16899, 'epoch': 3} {'type': 'loss', 'content': 0.05897410586476326, 'timestamp': '2025-09-10 02:46:03.992563', 'step': 16900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:04.023129', 'step': 16900, 'epoch': 3} {'type': 'loss', 'content': 0.07451876997947693, 'timestamp': '2025-09-10 02:46:04.025525', 'step': 16901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:04.056907', 'step': 16901, 'epoch': 3} {'type': 'loss', 'content': 0.11224035918712616, 'timestamp': '2025-09-10 02:46:04.059184', 'step': 16902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:04.089583', 'step': 16902, 'epoch': 3} {'type': 'loss', 'content': 0.03451421111822128, 'timestamp': '2025-09-10 02:46:04.092007', 'step': 16903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:04.122378', 'step': 16903, 'epoch': 3} {'type': 'loss', 'content': 0.09518421441316605, 'timestamp': '2025-09-10 02:46:04.145611', 'step': 16904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:04.176172', 'step': 16904, 'epoch': 3} {'type': 'loss', 'content': 0.06058561056852341, 'timestamp': '2025-09-10 02:46:04.178432', 'step': 16905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:04.208425', 'step': 16905, 'epoch': 3} {'type': 'loss', 'content': 0.0427495539188385, 'timestamp': '2025-09-10 02:46:04.211213', 'step': 16906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:04.241322', 'step': 16906, 'epoch': 3} {'type': 'loss', 'content': 0.02485349029302597, 'timestamp': '2025-09-10 02:46:04.243774', 'step': 16907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:04.274252', 'step': 16907, 'epoch': 3} {'type': 'loss', 'content': 0.0648917630314827, 'timestamp': '2025-09-10 02:46:04.298053', 'step': 16908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:04.329165', 'step': 16908, 'epoch': 3} {'type': 'loss', 'content': 0.17829889059066772, 'timestamp': '2025-09-10 02:46:04.331661', 'step': 16909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:04.363506', 'step': 16909, 'epoch': 3} {'type': 'loss', 'content': 0.03184151276946068, 'timestamp': '2025-09-10 02:46:04.365944', 'step': 16910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:04.396128', 'step': 16910, 'epoch': 3} {'type': 'loss', 'content': 0.05671365559101105, 'timestamp': '2025-09-10 02:46:04.398825', 'step': 16911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:04.428889', 'step': 16911, 'epoch': 3} {'type': 'loss', 'content': 0.09791600704193115, 'timestamp': '2025-09-10 02:46:04.452511', 'step': 16912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:04.483006', 'step': 16912, 'epoch': 3} {'type': 'loss', 'content': 0.017338579520583153, 'timestamp': '2025-09-10 02:46:04.486707', 'step': 16913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:04.528553', 'step': 16913, 'epoch': 3} {'type': 'loss', 'content': 0.0646788701415062, 'timestamp': '2025-09-10 02:46:04.531564', 'step': 16914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:04.574578', 'step': 16914, 'epoch': 3} {'type': 'loss', 'content': 0.12561506032943726, 'timestamp': '2025-09-10 02:46:04.577098', 'step': 16915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:04.607357', 'step': 16915, 'epoch': 3} {'type': 'loss', 'content': 0.08415384590625763, 'timestamp': '2025-09-10 02:46:04.630795', 'step': 16916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:04.662581', 'step': 16916, 'epoch': 3} {'type': 'loss', 'content': 0.11310666054487228, 'timestamp': '2025-09-10 02:46:04.664999', 'step': 16917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:04.695346', 'step': 16917, 'epoch': 3} {'type': 'loss', 'content': 0.04410721734166145, 'timestamp': '2025-09-10 02:46:04.697798', 'step': 16918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:04.727785', 'step': 16918, 'epoch': 3} {'type': 'loss', 'content': 0.0357731394469738, 'timestamp': '2025-09-10 02:46:04.730162', 'step': 16919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:04.762249', 'step': 16919, 'epoch': 3} {'type': 'loss', 'content': 0.05794583633542061, 'timestamp': '2025-09-10 02:46:04.785863', 'step': 16920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:04.815923', 'step': 16920, 'epoch': 3} {'type': 'loss', 'content': 0.018221447244286537, 'timestamp': '2025-09-10 02:46:04.818386', 'step': 16921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:04.848176', 'step': 16921, 'epoch': 3} {'type': 'loss', 'content': 0.08282927423715591, 'timestamp': '2025-09-10 02:46:04.851259', 'step': 16922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:04.882139', 'step': 16922, 'epoch': 3} {'type': 'loss', 'content': 0.09030205011367798, 'timestamp': '2025-09-10 02:46:04.884249', 'step': 16923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:04.913655', 'step': 16923, 'epoch': 3} {'type': 'loss', 'content': 0.07657371461391449, 'timestamp': '2025-09-10 02:46:04.938197', 'step': 16924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:04.971951', 'step': 16924, 'epoch': 3} {'type': 'loss', 'content': 0.02789684757590294, 'timestamp': '2025-09-10 02:46:04.974747', 'step': 16925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:05.006139', 'step': 16925, 'epoch': 3} {'type': 'loss', 'content': 0.032954663038253784, 'timestamp': '2025-09-10 02:46:05.008594', 'step': 16926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:05.038721', 'step': 16926, 'epoch': 3} {'type': 'loss', 'content': 0.06510347872972488, 'timestamp': '2025-09-10 02:46:05.041053', 'step': 16927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:05.074614', 'step': 16927, 'epoch': 3} {'type': 'loss', 'content': 0.10256173461675644, 'timestamp': '2025-09-10 02:46:05.098611', 'step': 16928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:05.128614', 'step': 16928, 'epoch': 3} {'type': 'loss', 'content': 0.03800961747765541, 'timestamp': '2025-09-10 02:46:05.132488', 'step': 16929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:05.166134', 'step': 16929, 'epoch': 3} {'type': 'loss', 'content': 0.11564763635396957, 'timestamp': '2025-09-10 02:46:05.168599', 'step': 16930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.199603', 'step': 16930, 'epoch': 3} {'type': 'loss', 'content': 0.03826659917831421, 'timestamp': '2025-09-10 02:46:05.203764', 'step': 16931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:05.238861', 'step': 16931, 'epoch': 3} {'type': 'loss', 'content': 0.05452729016542435, 'timestamp': '2025-09-10 02:46:05.262996', 'step': 16932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:05.293743', 'step': 16932, 'epoch': 3} {'type': 'loss', 'content': 0.11583230644464493, 'timestamp': '2025-09-10 02:46:05.297544', 'step': 16933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:05.330185', 'step': 16933, 'epoch': 3} {'type': 'loss', 'content': 0.04144918918609619, 'timestamp': '2025-09-10 02:46:05.332694', 'step': 16934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:05.363317', 'step': 16934, 'epoch': 3} {'type': 'loss', 'content': 0.06462247669696808, 'timestamp': '2025-09-10 02:46:05.365853', 'step': 16935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:05.396646', 'step': 16935, 'epoch': 3} {'type': 'loss', 'content': 0.03689531236886978, 'timestamp': '2025-09-10 02:46:05.420463', 'step': 16936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:05.451036', 'step': 16936, 'epoch': 3} {'type': 'loss', 'content': 0.044814273715019226, 'timestamp': '2025-09-10 02:46:05.454560', 'step': 16937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:05.485132', 'step': 16937, 'epoch': 3} {'type': 'loss', 'content': 0.09239460527896881, 'timestamp': '2025-09-10 02:46:05.487768', 'step': 16938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.517965', 'step': 16938, 'epoch': 3} {'type': 'loss', 'content': 0.045386698096990585, 'timestamp': '2025-09-10 02:46:05.520376', 'step': 16939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.550778', 'step': 16939, 'epoch': 3} {'type': 'loss', 'content': 0.12243107706308365, 'timestamp': '2025-09-10 02:46:05.574341', 'step': 16940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.605471', 'step': 16940, 'epoch': 3} {'type': 'loss', 'content': 0.057943567633628845, 'timestamp': '2025-09-10 02:46:05.607709', 'step': 16941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.639845', 'step': 16941, 'epoch': 3} {'type': 'loss', 'content': 0.07658804208040237, 'timestamp': '2025-09-10 02:46:05.642242', 'step': 16942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.671984', 'step': 16942, 'epoch': 3} {'type': 'loss', 'content': 0.08792903274297714, 'timestamp': '2025-09-10 02:46:05.674416', 'step': 16943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:05.704036', 'step': 16943, 'epoch': 3} {'type': 'loss', 'content': 0.050349242985248566, 'timestamp': '2025-09-10 02:46:05.727639', 'step': 16944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:05.758126', 'step': 16944, 'epoch': 3} {'type': 'loss', 'content': 0.08909407258033752, 'timestamp': '2025-09-10 02:46:05.760497', 'step': 16945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:05.790921', 'step': 16945, 'epoch': 3} {'type': 'loss', 'content': 0.06882563978433609, 'timestamp': '2025-09-10 02:46:05.793111', 'step': 16946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.822798', 'step': 16946, 'epoch': 3} {'type': 'loss', 'content': 0.053086306899785995, 'timestamp': '2025-09-10 02:46:05.825329', 'step': 16947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.856064', 'step': 16947, 'epoch': 3} {'type': 'loss', 'content': 0.08083967119455338, 'timestamp': '2025-09-10 02:46:05.880870', 'step': 16948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.911846', 'step': 16948, 'epoch': 3} {'type': 'loss', 'content': 0.10780134052038193, 'timestamp': '2025-09-10 02:46:05.914706', 'step': 16949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:05.944965', 'step': 16949, 'epoch': 3} {'type': 'loss', 'content': 0.04499402642250061, 'timestamp': '2025-09-10 02:46:05.949744', 'step': 16950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:05.980276', 'step': 16950, 'epoch': 3} {'type': 'loss', 'content': 0.007130583748221397, 'timestamp': '2025-09-10 02:46:05.982518', 'step': 16951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:06.013133', 'step': 16951, 'epoch': 3} {'type': 'loss', 'content': 0.05643695220351219, 'timestamp': '2025-09-10 02:46:06.036979', 'step': 16952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:06.067456', 'step': 16952, 'epoch': 3} {'type': 'loss', 'content': 0.10192901641130447, 'timestamp': '2025-09-10 02:46:06.070057', 'step': 16953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:06.101820', 'step': 16953, 'epoch': 3} {'type': 'loss', 'content': 0.08637119829654694, 'timestamp': '2025-09-10 02:46:06.104296', 'step': 16954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:06.134579', 'step': 16954, 'epoch': 3} {'type': 'loss', 'content': 0.11902441084384918, 'timestamp': '2025-09-10 02:46:06.136753', 'step': 16955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:06.167737', 'step': 16955, 'epoch': 3} {'type': 'loss', 'content': 0.1040838360786438, 'timestamp': '2025-09-10 02:46:06.191555', 'step': 16956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:06.221981', 'step': 16956, 'epoch': 3} {'type': 'loss', 'content': 0.06333231925964355, 'timestamp': '2025-09-10 02:46:06.224521', 'step': 16957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:06.254235', 'step': 16957, 'epoch': 3} {'type': 'loss', 'content': 0.09125888347625732, 'timestamp': '2025-09-10 02:46:06.256633', 'step': 16958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:06.288730', 'step': 16958, 'epoch': 3} {'type': 'loss', 'content': 0.08187825232744217, 'timestamp': '2025-09-10 02:46:06.291115', 'step': 16959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:06.321138', 'step': 16959, 'epoch': 3} {'type': 'loss', 'content': 0.0631151869893074, 'timestamp': '2025-09-10 02:46:06.344711', 'step': 16960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:06.374644', 'step': 16960, 'epoch': 3} {'type': 'loss', 'content': 0.12548741698265076, 'timestamp': '2025-09-10 02:46:06.377075', 'step': 16961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:06.407442', 'step': 16961, 'epoch': 3} {'type': 'loss', 'content': 0.06694169342517853, 'timestamp': '2025-09-10 02:46:06.409846', 'step': 16962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:06.439500', 'step': 16962, 'epoch': 3} {'type': 'loss', 'content': 0.061969924718141556, 'timestamp': '2025-09-10 02:46:06.442359', 'step': 16963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:06.472380', 'step': 16963, 'epoch': 3} {'type': 'loss', 'content': 0.05749248340725899, 'timestamp': '2025-09-10 02:46:06.495784', 'step': 16964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:06.529000', 'step': 16964, 'epoch': 3} {'type': 'loss', 'content': 0.06667645275592804, 'timestamp': '2025-09-10 02:46:06.531303', 'step': 16965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:06.561643', 'step': 16965, 'epoch': 3} {'type': 'loss', 'content': 0.01699657179415226, 'timestamp': '2025-09-10 02:46:06.564071', 'step': 16966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:06.595852', 'step': 16966, 'epoch': 3} {'type': 'loss', 'content': 0.1004701778292656, 'timestamp': '2025-09-10 02:46:06.598224', 'step': 16967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:06.628559', 'step': 16967, 'epoch': 3} {'type': 'loss', 'content': 0.0714622288942337, 'timestamp': '2025-09-10 02:46:06.652813', 'step': 16968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:06.685372', 'step': 16968, 'epoch': 3} {'type': 'loss', 'content': 0.05472981929779053, 'timestamp': '2025-09-10 02:46:06.688192', 'step': 16969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:06.722788', 'step': 16969, 'epoch': 3} {'type': 'loss', 'content': 0.0691726952791214, 'timestamp': '2025-09-10 02:46:06.725942', 'step': 16970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:06.763374', 'step': 16970, 'epoch': 3} {'type': 'loss', 'content': 0.060029394924640656, 'timestamp': '2025-09-10 02:46:06.766285', 'step': 16971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:06.800154', 'step': 16971, 'epoch': 3} {'type': 'loss', 'content': 0.05745968967676163, 'timestamp': '2025-09-10 02:46:06.823657', 'step': 16972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:06.857052', 'step': 16972, 'epoch': 3} {'type': 'loss', 'content': 0.17040805518627167, 'timestamp': '2025-09-10 02:46:06.859329', 'step': 16973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:06.901940', 'step': 16973, 'epoch': 3} {'type': 'loss', 'content': 0.05944298207759857, 'timestamp': '2025-09-10 02:46:06.904314', 'step': 16974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:06.933925', 'step': 16974, 'epoch': 3} {'type': 'loss', 'content': 0.06053847074508667, 'timestamp': '2025-09-10 02:46:06.936523', 'step': 16975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:06.970910', 'step': 16975, 'epoch': 3} {'type': 'loss', 'content': 0.056324515491724014, 'timestamp': '2025-09-10 02:46:06.995806', 'step': 16976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:07.025342', 'step': 16976, 'epoch': 3} {'type': 'loss', 'content': 0.027943309396505356, 'timestamp': '2025-09-10 02:46:07.027851', 'step': 16977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:07.059645', 'step': 16977, 'epoch': 3} {'type': 'loss', 'content': 0.1495993435382843, 'timestamp': '2025-09-10 02:46:07.065095', 'step': 16978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:07.096190', 'step': 16978, 'epoch': 3} {'type': 'loss', 'content': 0.047577351331710815, 'timestamp': '2025-09-10 02:46:07.098523', 'step': 16979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:07.128706', 'step': 16979, 'epoch': 3} {'type': 'loss', 'content': 0.07689476758241653, 'timestamp': '2025-09-10 02:46:07.152132', 'step': 16980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:07.183558', 'step': 16980, 'epoch': 3} {'type': 'loss', 'content': 0.04517551138997078, 'timestamp': '2025-09-10 02:46:07.185701', 'step': 16981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:07.215188', 'step': 16981, 'epoch': 3} {'type': 'loss', 'content': 0.07139149308204651, 'timestamp': '2025-09-10 02:46:07.217800', 'step': 16982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:07.249129', 'step': 16982, 'epoch': 3} {'type': 'loss', 'content': 0.10647578537464142, 'timestamp': '2025-09-10 02:46:07.251506', 'step': 16983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:07.281660', 'step': 16983, 'epoch': 3} {'type': 'loss', 'content': 0.12739673256874084, 'timestamp': '2025-09-10 02:46:07.306003', 'step': 16984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:07.335853', 'step': 16984, 'epoch': 3} {'type': 'loss', 'content': 0.09894835203886032, 'timestamp': '2025-09-10 02:46:07.338706', 'step': 16985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:07.370617', 'step': 16985, 'epoch': 3} {'type': 'loss', 'content': 0.04725709557533264, 'timestamp': '2025-09-10 02:46:07.373223', 'step': 16986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:07.407221', 'step': 16986, 'epoch': 3} {'type': 'loss', 'content': 0.08913104236125946, 'timestamp': '2025-09-10 02:46:07.410018', 'step': 16987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:07.443338', 'step': 16987, 'epoch': 3} {'type': 'loss', 'content': 0.0682816430926323, 'timestamp': '2025-09-10 02:46:07.467040', 'step': 16988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:07.498647', 'step': 16988, 'epoch': 3} {'type': 'loss', 'content': 0.03818285092711449, 'timestamp': '2025-09-10 02:46:07.501565', 'step': 16989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:07.534208', 'step': 16989, 'epoch': 3} {'type': 'loss', 'content': 0.056663595139980316, 'timestamp': '2025-09-10 02:46:07.537467', 'step': 16990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:07.574978', 'step': 16990, 'epoch': 3} {'type': 'loss', 'content': 0.07045963406562805, 'timestamp': '2025-09-10 02:46:07.583725', 'step': 16991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:07.622183', 'step': 16991, 'epoch': 3} {'type': 'loss', 'content': 0.05647946149110794, 'timestamp': '2025-09-10 02:46:07.646314', 'step': 16992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:07.679335', 'step': 16992, 'epoch': 3} {'type': 'loss', 'content': 0.0864570215344429, 'timestamp': '2025-09-10 02:46:07.683782', 'step': 16993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:07.714653', 'step': 16993, 'epoch': 3} {'type': 'loss', 'content': 0.08340459316968918, 'timestamp': '2025-09-10 02:46:07.717131', 'step': 16994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:07.746823', 'step': 16994, 'epoch': 3} {'type': 'loss', 'content': 0.053331516683101654, 'timestamp': '2025-09-10 02:46:07.749291', 'step': 16995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:07.781467', 'step': 16995, 'epoch': 3} {'type': 'loss', 'content': 0.062446996569633484, 'timestamp': '2025-09-10 02:46:07.806846', 'step': 16996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:07.837480', 'step': 16996, 'epoch': 3} {'type': 'loss', 'content': 0.08040492236614227, 'timestamp': '2025-09-10 02:46:07.839790', 'step': 16997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:07.870451', 'step': 16997, 'epoch': 3} {'type': 'loss', 'content': 0.14078295230865479, 'timestamp': '2025-09-10 02:46:07.872920', 'step': 16998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:07.903408', 'step': 16998, 'epoch': 3} {'type': 'loss', 'content': 0.0951433852314949, 'timestamp': '2025-09-10 02:46:07.905838', 'step': 16999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:07.935925', 'step': 16999, 'epoch': 3} {'type': 'loss', 'content': 0.11081985384225845, 'timestamp': '2025-09-10 02:46:07.960003', 'step': 17000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17000', 'timestamp': '2025-09-10 02:46:12.744493', 'step': 17000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:12.778722', 'step': 17000, 'epoch': 3} {'type': 'loss', 'content': 0.0850180983543396, 'timestamp': '2025-09-10 02:46:12.781239', 'step': 17001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:12.812398', 'step': 17001, 'epoch': 3} {'type': 'loss', 'content': 0.08301489800214767, 'timestamp': '2025-09-10 02:46:12.814889', 'step': 17002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:12.844924', 'step': 17002, 'epoch': 3} {'type': 'loss', 'content': 0.11703276634216309, 'timestamp': '2025-09-10 02:46:12.849614', 'step': 17003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:12.880385', 'step': 17003, 'epoch': 3} {'type': 'loss', 'content': 0.043365515768527985, 'timestamp': '2025-09-10 02:46:12.904045', 'step': 17004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:12.933901', 'step': 17004, 'epoch': 3} {'type': 'loss', 'content': 0.04283275082707405, 'timestamp': '2025-09-10 02:46:12.936184', 'step': 17005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:12.966129', 'step': 17005, 'epoch': 3} {'type': 'loss', 'content': 0.02328045479953289, 'timestamp': '2025-09-10 02:46:12.974880', 'step': 17006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.011859', 'step': 17006, 'epoch': 3} {'type': 'loss', 'content': 0.08268071711063385, 'timestamp': '2025-09-10 02:46:13.014473', 'step': 17007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:13.045556', 'step': 17007, 'epoch': 3} {'type': 'loss', 'content': 0.04959286004304886, 'timestamp': '2025-09-10 02:46:13.069271', 'step': 17008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.101500', 'step': 17008, 'epoch': 3} {'type': 'loss', 'content': 0.045347414910793304, 'timestamp': '2025-09-10 02:46:13.103713', 'step': 17009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.140133', 'step': 17009, 'epoch': 3} {'type': 'loss', 'content': 0.020562097430229187, 'timestamp': '2025-09-10 02:46:13.144715', 'step': 17010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.181457', 'step': 17010, 'epoch': 3} {'type': 'loss', 'content': 0.07748857140541077, 'timestamp': '2025-09-10 02:46:13.184085', 'step': 17011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.215379', 'step': 17011, 'epoch': 3} {'type': 'loss', 'content': 0.1283964365720749, 'timestamp': '2025-09-10 02:46:13.239190', 'step': 17012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:13.273588', 'step': 17012, 'epoch': 3} {'type': 'loss', 'content': 0.0716189295053482, 'timestamp': '2025-09-10 02:46:13.276253', 'step': 17013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.313366', 'step': 17013, 'epoch': 3} {'type': 'loss', 'content': 0.18254658579826355, 'timestamp': '2025-09-10 02:46:13.315884', 'step': 17014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.345983', 'step': 17014, 'epoch': 3} {'type': 'loss', 'content': 0.0466872975230217, 'timestamp': '2025-09-10 02:46:13.350857', 'step': 17015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:13.383841', 'step': 17015, 'epoch': 3} {'type': 'loss', 'content': 0.06254193931818008, 'timestamp': '2025-09-10 02:46:13.407418', 'step': 17016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.440159', 'step': 17016, 'epoch': 3} {'type': 'loss', 'content': 0.11020947247743607, 'timestamp': '2025-09-10 02:46:13.442501', 'step': 17017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.479855', 'step': 17017, 'epoch': 3} {'type': 'loss', 'content': 0.11878661066293716, 'timestamp': '2025-09-10 02:46:13.482588', 'step': 17018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:13.512642', 'step': 17018, 'epoch': 3} {'type': 'loss', 'content': 0.08495932817459106, 'timestamp': '2025-09-10 02:46:13.515321', 'step': 17019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:13.547376', 'step': 17019, 'epoch': 3} {'type': 'loss', 'content': 0.11068010330200195, 'timestamp': '2025-09-10 02:46:13.571079', 'step': 17020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:13.602049', 'step': 17020, 'epoch': 3} {'type': 'loss', 'content': 0.0792955532670021, 'timestamp': '2025-09-10 02:46:13.606116', 'step': 17021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:13.636920', 'step': 17021, 'epoch': 3} {'type': 'loss', 'content': 0.0781174898147583, 'timestamp': '2025-09-10 02:46:13.639527', 'step': 17022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:13.670591', 'step': 17022, 'epoch': 3} {'type': 'loss', 'content': 0.06477998942136765, 'timestamp': '2025-09-10 02:46:13.673123', 'step': 17023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:13.705559', 'step': 17023, 'epoch': 3} {'type': 'loss', 'content': 0.03239283710718155, 'timestamp': '2025-09-10 02:46:13.728937', 'step': 17024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:13.766225', 'step': 17024, 'epoch': 3} {'type': 'loss', 'content': 0.10192392766475677, 'timestamp': '2025-09-10 02:46:13.769614', 'step': 17025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:13.803362', 'step': 17025, 'epoch': 3} {'type': 'loss', 'content': 0.1445913314819336, 'timestamp': '2025-09-10 02:46:13.805668', 'step': 17026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:13.835918', 'step': 17026, 'epoch': 3} {'type': 'loss', 'content': 0.11797817796468735, 'timestamp': '2025-09-10 02:46:13.838513', 'step': 17027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:13.868585', 'step': 17027, 'epoch': 3} {'type': 'loss', 'content': 0.051735129207372665, 'timestamp': '2025-09-10 02:46:13.894949', 'step': 17028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:13.926201', 'step': 17028, 'epoch': 3} {'type': 'loss', 'content': 0.06118590384721756, 'timestamp': '2025-09-10 02:46:13.929801', 'step': 17029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:13.962736', 'step': 17029, 'epoch': 3} {'type': 'loss', 'content': 0.0732705220580101, 'timestamp': '2025-09-10 02:46:13.969227', 'step': 17030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:14.004966', 'step': 17030, 'epoch': 3} {'type': 'loss', 'content': 0.12291871011257172, 'timestamp': '2025-09-10 02:46:14.009825', 'step': 17031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:14.041532', 'step': 17031, 'epoch': 3} {'type': 'loss', 'content': 0.07609562575817108, 'timestamp': '2025-09-10 02:46:14.064986', 'step': 17032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:14.098406', 'step': 17032, 'epoch': 3} {'type': 'loss', 'content': 0.09844911843538284, 'timestamp': '2025-09-10 02:46:14.100983', 'step': 17033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:14.132336', 'step': 17033, 'epoch': 3} {'type': 'loss', 'content': 0.08336036652326584, 'timestamp': '2025-09-10 02:46:14.134396', 'step': 17034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:14.164526', 'step': 17034, 'epoch': 3} {'type': 'loss', 'content': 0.10003649443387985, 'timestamp': '2025-09-10 02:46:14.168511', 'step': 17035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:14.199566', 'step': 17035, 'epoch': 3} {'type': 'loss', 'content': 0.09145351499319077, 'timestamp': '2025-09-10 02:46:14.223738', 'step': 17036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:14.255665', 'step': 17036, 'epoch': 3} {'type': 'loss', 'content': 0.0802263393998146, 'timestamp': '2025-09-10 02:46:14.258934', 'step': 17037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:14.292881', 'step': 17037, 'epoch': 3} {'type': 'loss', 'content': 0.07196078449487686, 'timestamp': '2025-09-10 02:46:14.296696', 'step': 17038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:14.331690', 'step': 17038, 'epoch': 3} {'type': 'loss', 'content': 0.027802227064967155, 'timestamp': '2025-09-10 02:46:14.338513', 'step': 17039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:14.375338', 'step': 17039, 'epoch': 3} {'type': 'loss', 'content': 0.07705385982990265, 'timestamp': '2025-09-10 02:46:14.399389', 'step': 17040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:14.434336', 'step': 17040, 'epoch': 3} {'type': 'loss', 'content': 0.036326829344034195, 'timestamp': '2025-09-10 02:46:14.437083', 'step': 17041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:14.468753', 'step': 17041, 'epoch': 3} {'type': 'loss', 'content': 0.05930962413549423, 'timestamp': '2025-09-10 02:46:14.471601', 'step': 17042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:14.502500', 'step': 17042, 'epoch': 3} {'type': 'loss', 'content': 0.08309021592140198, 'timestamp': '2025-09-10 02:46:14.505000', 'step': 17043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:14.535592', 'step': 17043, 'epoch': 3} {'type': 'loss', 'content': 0.03680267184972763, 'timestamp': '2025-09-10 02:46:14.559214', 'step': 17044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:14.590771', 'step': 17044, 'epoch': 3} {'type': 'loss', 'content': 0.07073850184679031, 'timestamp': '2025-09-10 02:46:14.592975', 'step': 17045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:14.623457', 'step': 17045, 'epoch': 3} {'type': 'loss', 'content': 0.09801211953163147, 'timestamp': '2025-09-10 02:46:14.625709', 'step': 17046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:14.656251', 'step': 17046, 'epoch': 3} {'type': 'loss', 'content': 0.09098251909017563, 'timestamp': '2025-09-10 02:46:14.662048', 'step': 17047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:14.692160', 'step': 17047, 'epoch': 3} {'type': 'loss', 'content': 0.2043776512145996, 'timestamp': '2025-09-10 02:46:14.715784', 'step': 17048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:14.747093', 'step': 17048, 'epoch': 3} {'type': 'loss', 'content': 0.0776577740907669, 'timestamp': '2025-09-10 02:46:14.750318', 'step': 17049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:14.781465', 'step': 17049, 'epoch': 3} {'type': 'loss', 'content': 0.054524973034858704, 'timestamp': '2025-09-10 02:46:14.783965', 'step': 17050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:14.814280', 'step': 17050, 'epoch': 3} {'type': 'loss', 'content': 0.07036272436380386, 'timestamp': '2025-09-10 02:46:14.816875', 'step': 17051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:14.846772', 'step': 17051, 'epoch': 3} {'type': 'loss', 'content': 0.1437087208032608, 'timestamp': '2025-09-10 02:46:14.870340', 'step': 17052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:14.902031', 'step': 17052, 'epoch': 3} {'type': 'loss', 'content': 0.03509185090661049, 'timestamp': '2025-09-10 02:46:14.904218', 'step': 17053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:14.934440', 'step': 17053, 'epoch': 3} {'type': 'loss', 'content': 0.03899297118186951, 'timestamp': '2025-09-10 02:46:14.936795', 'step': 17054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:14.968907', 'step': 17054, 'epoch': 3} {'type': 'loss', 'content': 0.16840998828411102, 'timestamp': '2025-09-10 02:46:14.977550', 'step': 17055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:15.010465', 'step': 17055, 'epoch': 3} {'type': 'loss', 'content': 0.02511374093592167, 'timestamp': '2025-09-10 02:46:15.034035', 'step': 17056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:15.064576', 'step': 17056, 'epoch': 3} {'type': 'loss', 'content': 0.08388690650463104, 'timestamp': '2025-09-10 02:46:15.066858', 'step': 17057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:15.100914', 'step': 17057, 'epoch': 3} {'type': 'loss', 'content': 0.18772032856941223, 'timestamp': '2025-09-10 02:46:15.103446', 'step': 17058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:15.134228', 'step': 17058, 'epoch': 3} {'type': 'loss', 'content': 0.05205278843641281, 'timestamp': '2025-09-10 02:46:15.136314', 'step': 17059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:15.166446', 'step': 17059, 'epoch': 3} {'type': 'loss', 'content': 0.030341852456331253, 'timestamp': '2025-09-10 02:46:15.190250', 'step': 17060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:15.223315', 'step': 17060, 'epoch': 3} {'type': 'loss', 'content': 0.09530352801084518, 'timestamp': '2025-09-10 02:46:15.225998', 'step': 17061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:15.256448', 'step': 17061, 'epoch': 3} {'type': 'loss', 'content': 0.07203031331300735, 'timestamp': '2025-09-10 02:46:15.258860', 'step': 17062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:15.289784', 'step': 17062, 'epoch': 3} {'type': 'loss', 'content': 0.05711578205227852, 'timestamp': '2025-09-10 02:46:15.293591', 'step': 17063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:15.327047', 'step': 17063, 'epoch': 3} {'type': 'loss', 'content': 0.07947645336389542, 'timestamp': '2025-09-10 02:46:15.351499', 'step': 17064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:15.383455', 'step': 17064, 'epoch': 3} {'type': 'loss', 'content': 0.054078709334135056, 'timestamp': '2025-09-10 02:46:15.386405', 'step': 17065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:15.418413', 'step': 17065, 'epoch': 3} {'type': 'loss', 'content': 0.07075077295303345, 'timestamp': '2025-09-10 02:46:15.420852', 'step': 17066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:15.452943', 'step': 17066, 'epoch': 3} {'type': 'loss', 'content': 0.09195934236049652, 'timestamp': '2025-09-10 02:46:15.455524', 'step': 17067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:15.487131', 'step': 17067, 'epoch': 3} {'type': 'loss', 'content': 0.0625237300992012, 'timestamp': '2025-09-10 02:46:15.511504', 'step': 17068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:15.542744', 'step': 17068, 'epoch': 3} {'type': 'loss', 'content': 0.047608714550733566, 'timestamp': '2025-09-10 02:46:15.545287', 'step': 17069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:15.577024', 'step': 17069, 'epoch': 3} {'type': 'loss', 'content': 0.08081836998462677, 'timestamp': '2025-09-10 02:46:15.579460', 'step': 17070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:15.611530', 'step': 17070, 'epoch': 3} {'type': 'loss', 'content': 0.07947519421577454, 'timestamp': '2025-09-10 02:46:15.613805', 'step': 17071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:15.645527', 'step': 17071, 'epoch': 3} {'type': 'loss', 'content': 0.08830156177282333, 'timestamp': '2025-09-10 02:46:15.669369', 'step': 17072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:15.700633', 'step': 17072, 'epoch': 3} {'type': 'loss', 'content': 0.04883995279669762, 'timestamp': '2025-09-10 02:46:15.703192', 'step': 17073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:15.735027', 'step': 17073, 'epoch': 3} {'type': 'loss', 'content': 0.05079967528581619, 'timestamp': '2025-09-10 02:46:15.737515', 'step': 17074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:15.768025', 'step': 17074, 'epoch': 3} {'type': 'loss', 'content': 0.07137557119131088, 'timestamp': '2025-09-10 02:46:15.770191', 'step': 17075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:15.800359', 'step': 17075, 'epoch': 3} {'type': 'loss', 'content': 0.11995159834623337, 'timestamp': '2025-09-10 02:46:15.824295', 'step': 17076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:15.855313', 'step': 17076, 'epoch': 3} {'type': 'loss', 'content': 0.07673272490501404, 'timestamp': '2025-09-10 02:46:15.857870', 'step': 17077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:15.888202', 'step': 17077, 'epoch': 3} {'type': 'loss', 'content': 0.028875481337308884, 'timestamp': '2025-09-10 02:46:15.890682', 'step': 17078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:15.921427', 'step': 17078, 'epoch': 3} {'type': 'loss', 'content': 0.10937429219484329, 'timestamp': '2025-09-10 02:46:15.924286', 'step': 17079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:15.954512', 'step': 17079, 'epoch': 3} {'type': 'loss', 'content': 0.07298921793699265, 'timestamp': '2025-09-10 02:46:15.978626', 'step': 17080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.012381', 'step': 17080, 'epoch': 3} {'type': 'loss', 'content': 0.06498680263757706, 'timestamp': '2025-09-10 02:46:16.014569', 'step': 17081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:16.044779', 'step': 17081, 'epoch': 3} {'type': 'loss', 'content': 0.11359928548336029, 'timestamp': '2025-09-10 02:46:16.047349', 'step': 17082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:16.078200', 'step': 17082, 'epoch': 3} {'type': 'loss', 'content': 0.07529901713132858, 'timestamp': '2025-09-10 02:46:16.080403', 'step': 17083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.110587', 'step': 17083, 'epoch': 3} {'type': 'loss', 'content': 0.1262710988521576, 'timestamp': '2025-09-10 02:46:16.133992', 'step': 17084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.164446', 'step': 17084, 'epoch': 3} {'type': 'loss', 'content': 0.12603117525577545, 'timestamp': '2025-09-10 02:46:16.167533', 'step': 17085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:16.199049', 'step': 17085, 'epoch': 3} {'type': 'loss', 'content': 0.03679637610912323, 'timestamp': '2025-09-10 02:46:16.201337', 'step': 17086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:16.231614', 'step': 17086, 'epoch': 3} {'type': 'loss', 'content': 0.09273754060268402, 'timestamp': '2025-09-10 02:46:16.233814', 'step': 17087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.263928', 'step': 17087, 'epoch': 3} {'type': 'loss', 'content': 0.14282430708408356, 'timestamp': '2025-09-10 02:46:16.287800', 'step': 17088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.319661', 'step': 17088, 'epoch': 3} {'type': 'loss', 'content': 0.11551259458065033, 'timestamp': '2025-09-10 02:46:16.322113', 'step': 17089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:16.352783', 'step': 17089, 'epoch': 3} {'type': 'loss', 'content': 0.09273418039083481, 'timestamp': '2025-09-10 02:46:16.355676', 'step': 17090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:16.388373', 'step': 17090, 'epoch': 3} {'type': 'loss', 'content': 0.030711423605680466, 'timestamp': '2025-09-10 02:46:16.391101', 'step': 17091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:16.423373', 'step': 17091, 'epoch': 3} {'type': 'loss', 'content': 0.1443694829940796, 'timestamp': '2025-09-10 02:46:16.446778', 'step': 17092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.478181', 'step': 17092, 'epoch': 3} {'type': 'loss', 'content': 0.16819782555103302, 'timestamp': '2025-09-10 02:46:16.480914', 'step': 17093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:16.513497', 'step': 17093, 'epoch': 3} {'type': 'loss', 'content': 0.09192698448896408, 'timestamp': '2025-09-10 02:46:16.516109', 'step': 17094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:16.547058', 'step': 17094, 'epoch': 3} {'type': 'loss', 'content': 0.11441763490438461, 'timestamp': '2025-09-10 02:46:16.549416', 'step': 17095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:16.580246', 'step': 17095, 'epoch': 3} {'type': 'loss', 'content': 0.09714534878730774, 'timestamp': '2025-09-10 02:46:16.604128', 'step': 17096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:16.634183', 'step': 17096, 'epoch': 3} {'type': 'loss', 'content': 0.061834607273340225, 'timestamp': '2025-09-10 02:46:16.636847', 'step': 17097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.667502', 'step': 17097, 'epoch': 3} {'type': 'loss', 'content': 0.04516097530722618, 'timestamp': '2025-09-10 02:46:16.670034', 'step': 17098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.699576', 'step': 17098, 'epoch': 3} {'type': 'loss', 'content': 0.05129309371113777, 'timestamp': '2025-09-10 02:46:16.702409', 'step': 17099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:16.733138', 'step': 17099, 'epoch': 3} {'type': 'loss', 'content': 0.1309991031885147, 'timestamp': '2025-09-10 02:46:16.756825', 'step': 17100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:16.787330', 'step': 17100, 'epoch': 3} {'type': 'loss', 'content': 0.041547805070877075, 'timestamp': '2025-09-10 02:46:16.789722', 'step': 17101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.820733', 'step': 17101, 'epoch': 3} {'type': 'loss', 'content': 0.16146515309810638, 'timestamp': '2025-09-10 02:46:16.823455', 'step': 17102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.853485', 'step': 17102, 'epoch': 3} {'type': 'loss', 'content': 0.0390867218375206, 'timestamp': '2025-09-10 02:46:16.855750', 'step': 17103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.886294', 'step': 17103, 'epoch': 3} {'type': 'loss', 'content': 0.03763009235262871, 'timestamp': '2025-09-10 02:46:16.910006', 'step': 17104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:16.941822', 'step': 17104, 'epoch': 3} {'type': 'loss', 'content': 0.14207801222801208, 'timestamp': '2025-09-10 02:46:16.944452', 'step': 17105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:16.974618', 'step': 17105, 'epoch': 3} {'type': 'loss', 'content': 0.0858151987195015, 'timestamp': '2025-09-10 02:46:16.981426', 'step': 17106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:17.013197', 'step': 17106, 'epoch': 3} {'type': 'loss', 'content': 0.04729846864938736, 'timestamp': '2025-09-10 02:46:17.015624', 'step': 17107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:17.046557', 'step': 17107, 'epoch': 3} {'type': 'loss', 'content': 0.16572025418281555, 'timestamp': '2025-09-10 02:46:17.070327', 'step': 17108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:17.101141', 'step': 17108, 'epoch': 3} {'type': 'loss', 'content': 0.03737428039312363, 'timestamp': '2025-09-10 02:46:17.104919', 'step': 17109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:17.135043', 'step': 17109, 'epoch': 3} {'type': 'loss', 'content': 0.06068556383252144, 'timestamp': '2025-09-10 02:46:17.137440', 'step': 17110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:17.169045', 'step': 17110, 'epoch': 3} {'type': 'loss', 'content': 0.045599691569805145, 'timestamp': '2025-09-10 02:46:17.171885', 'step': 17111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:17.202972', 'step': 17111, 'epoch': 3} {'type': 'loss', 'content': 0.05798327550292015, 'timestamp': '2025-09-10 02:46:17.227801', 'step': 17112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:17.259165', 'step': 17112, 'epoch': 3} {'type': 'loss', 'content': 0.032632194459438324, 'timestamp': '2025-09-10 02:46:17.261395', 'step': 17113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:17.291455', 'step': 17113, 'epoch': 3} {'type': 'loss', 'content': 0.07314534485340118, 'timestamp': '2025-09-10 02:46:17.293770', 'step': 17114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:17.324701', 'step': 17114, 'epoch': 3} {'type': 'loss', 'content': 0.0848601683974266, 'timestamp': '2025-09-10 02:46:17.327349', 'step': 17115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:17.357130', 'step': 17115, 'epoch': 3} {'type': 'loss', 'content': 0.08541297912597656, 'timestamp': '2025-09-10 02:46:17.380987', 'step': 17116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:17.414540', 'step': 17116, 'epoch': 3} {'type': 'loss', 'content': 0.05027482658624649, 'timestamp': '2025-09-10 02:46:17.417111', 'step': 17117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:17.448625', 'step': 17117, 'epoch': 3} {'type': 'loss', 'content': 0.051683854311704636, 'timestamp': '2025-09-10 02:46:17.451422', 'step': 17118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:17.482516', 'step': 17118, 'epoch': 3} {'type': 'loss', 'content': 0.0826294869184494, 'timestamp': '2025-09-10 02:46:17.484686', 'step': 17119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:17.515408', 'step': 17119, 'epoch': 3} {'type': 'loss', 'content': 0.04244321212172508, 'timestamp': '2025-09-10 02:46:17.539212', 'step': 17120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:17.569406', 'step': 17120, 'epoch': 3} {'type': 'loss', 'content': 0.029154833406209946, 'timestamp': '2025-09-10 02:46:17.572111', 'step': 17121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:46:17.603412', 'step': 17121, 'epoch': 3} {'type': 'loss', 'content': 0.0858379527926445, 'timestamp': '2025-09-10 02:46:17.608266', 'step': 17122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:17.639441', 'step': 17122, 'epoch': 3} {'type': 'loss', 'content': 0.018115051090717316, 'timestamp': '2025-09-10 02:46:17.642647', 'step': 17123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:17.673028', 'step': 17123, 'epoch': 3} {'type': 'loss', 'content': 0.039841488003730774, 'timestamp': '2025-09-10 02:46:17.696796', 'step': 17124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:17.727187', 'step': 17124, 'epoch': 3} {'type': 'loss', 'content': 0.04782586172223091, 'timestamp': '2025-09-10 02:46:17.729434', 'step': 17125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:17.759361', 'step': 17125, 'epoch': 3} {'type': 'loss', 'content': 0.13914036750793457, 'timestamp': '2025-09-10 02:46:17.761552', 'step': 17126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:17.792094', 'step': 17126, 'epoch': 3} {'type': 'loss', 'content': 0.08563818037509918, 'timestamp': '2025-09-10 02:46:17.794525', 'step': 17127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:17.825967', 'step': 17127, 'epoch': 3} {'type': 'loss', 'content': 0.07964085042476654, 'timestamp': '2025-09-10 02:46:17.849487', 'step': 17128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:17.880205', 'step': 17128, 'epoch': 3} {'type': 'loss', 'content': 0.0762869343161583, 'timestamp': '2025-09-10 02:46:17.882430', 'step': 17129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:17.912499', 'step': 17129, 'epoch': 3} {'type': 'loss', 'content': 0.09330642223358154, 'timestamp': '2025-09-10 02:46:17.915674', 'step': 17130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:17.946727', 'step': 17130, 'epoch': 3} {'type': 'loss', 'content': 0.09468796849250793, 'timestamp': '2025-09-10 02:46:17.949301', 'step': 17131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:17.979566', 'step': 17131, 'epoch': 3} {'type': 'loss', 'content': 0.08678708970546722, 'timestamp': '2025-09-10 02:46:18.007367', 'step': 17132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.039484', 'step': 17132, 'epoch': 3} {'type': 'loss', 'content': 0.03900983929634094, 'timestamp': '2025-09-10 02:46:18.042099', 'step': 17133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:18.072240', 'step': 17133, 'epoch': 3} {'type': 'loss', 'content': 0.05601120367646217, 'timestamp': '2025-09-10 02:46:18.074740', 'step': 17134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:18.105693', 'step': 17134, 'epoch': 3} {'type': 'loss', 'content': 0.07068821042776108, 'timestamp': '2025-09-10 02:46:18.108253', 'step': 17135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.138969', 'step': 17135, 'epoch': 3} {'type': 'loss', 'content': 0.10218276083469391, 'timestamp': '2025-09-10 02:46:18.162589', 'step': 17136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:18.193049', 'step': 17136, 'epoch': 3} {'type': 'loss', 'content': 0.009045478887856007, 'timestamp': '2025-09-10 02:46:18.195349', 'step': 17137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.225520', 'step': 17137, 'epoch': 3} {'type': 'loss', 'content': 0.13161849975585938, 'timestamp': '2025-09-10 02:46:18.227755', 'step': 17138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:18.258367', 'step': 17138, 'epoch': 3} {'type': 'loss', 'content': 0.06988528370857239, 'timestamp': '2025-09-10 02:46:18.261040', 'step': 17139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:46:18.291489', 'step': 17139, 'epoch': 3} {'type': 'loss', 'content': 0.037909816950559616, 'timestamp': '2025-09-10 02:46:18.316392', 'step': 17140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:18.348452', 'step': 17140, 'epoch': 3} {'type': 'loss', 'content': 0.03035520762205124, 'timestamp': '2025-09-10 02:46:18.350918', 'step': 17141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.381370', 'step': 17141, 'epoch': 3} {'type': 'loss', 'content': 0.0571342296898365, 'timestamp': '2025-09-10 02:46:18.383931', 'step': 17142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.414278', 'step': 17142, 'epoch': 3} {'type': 'loss', 'content': 0.12055963277816772, 'timestamp': '2025-09-10 02:46:18.416930', 'step': 17143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:18.448551', 'step': 17143, 'epoch': 3} {'type': 'loss', 'content': 0.10764969140291214, 'timestamp': '2025-09-10 02:46:18.472411', 'step': 17144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:18.502642', 'step': 17144, 'epoch': 3} {'type': 'loss', 'content': 0.06521742790937424, 'timestamp': '2025-09-10 02:46:18.505266', 'step': 17145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.535442', 'step': 17145, 'epoch': 3} {'type': 'loss', 'content': 0.10741039365530014, 'timestamp': '2025-09-10 02:46:18.537658', 'step': 17146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.568335', 'step': 17146, 'epoch': 3} {'type': 'loss', 'content': 0.06175914779305458, 'timestamp': '2025-09-10 02:46:18.570841', 'step': 17147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:18.601563', 'step': 17147, 'epoch': 3} {'type': 'loss', 'content': 0.0845867469906807, 'timestamp': '2025-09-10 02:46:18.625115', 'step': 17148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:18.654914', 'step': 17148, 'epoch': 3} {'type': 'loss', 'content': 0.07490022480487823, 'timestamp': '2025-09-10 02:46:18.657768', 'step': 17149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:18.689364', 'step': 17149, 'epoch': 3} {'type': 'loss', 'content': 0.07890468090772629, 'timestamp': '2025-09-10 02:46:18.691613', 'step': 17150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.721410', 'step': 17150, 'epoch': 3} {'type': 'loss', 'content': 0.07766080647706985, 'timestamp': '2025-09-10 02:46:18.724885', 'step': 17151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:18.755435', 'step': 17151, 'epoch': 3} {'type': 'loss', 'content': 0.07829397171735764, 'timestamp': '2025-09-10 02:46:18.779096', 'step': 17152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.809944', 'step': 17152, 'epoch': 3} {'type': 'loss', 'content': 0.026133714243769646, 'timestamp': '2025-09-10 02:46:18.812408', 'step': 17153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:18.842815', 'step': 17153, 'epoch': 3} {'type': 'loss', 'content': 0.08861741423606873, 'timestamp': '2025-09-10 02:46:18.845451', 'step': 17154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:18.875757', 'step': 17154, 'epoch': 3} {'type': 'loss', 'content': 0.0893930047750473, 'timestamp': '2025-09-10 02:46:18.878459', 'step': 17155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:18.908625', 'step': 17155, 'epoch': 3} {'type': 'loss', 'content': 0.07328829169273376, 'timestamp': '2025-09-10 02:46:18.932318', 'step': 17156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:18.962410', 'step': 17156, 'epoch': 3} {'type': 'loss', 'content': 0.1461738795042038, 'timestamp': '2025-09-10 02:46:18.964591', 'step': 17157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:18.995207', 'step': 17157, 'epoch': 3} {'type': 'loss', 'content': 0.06372387707233429, 'timestamp': '2025-09-10 02:46:18.999502', 'step': 17158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:19.032047', 'step': 17158, 'epoch': 3} {'type': 'loss', 'content': 0.05264263600111008, 'timestamp': '2025-09-10 02:46:19.034396', 'step': 17159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:19.064210', 'step': 17159, 'epoch': 3} {'type': 'loss', 'content': 0.02682936191558838, 'timestamp': '2025-09-10 02:46:19.087819', 'step': 17160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:19.117727', 'step': 17160, 'epoch': 3} {'type': 'loss', 'content': 0.09493529051542282, 'timestamp': '2025-09-10 02:46:19.120094', 'step': 17161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:19.150605', 'step': 17161, 'epoch': 3} {'type': 'loss', 'content': 0.03414619341492653, 'timestamp': '2025-09-10 02:46:19.152842', 'step': 17162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:19.182957', 'step': 17162, 'epoch': 3} {'type': 'loss', 'content': 0.12857291102409363, 'timestamp': '2025-09-10 02:46:19.185563', 'step': 17163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:19.216444', 'step': 17163, 'epoch': 3} {'type': 'loss', 'content': 0.027478376403450966, 'timestamp': '2025-09-10 02:46:19.240077', 'step': 17164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:19.270801', 'step': 17164, 'epoch': 3} {'type': 'loss', 'content': 0.12866589426994324, 'timestamp': '2025-09-10 02:46:19.273486', 'step': 17165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:19.303530', 'step': 17165, 'epoch': 3} {'type': 'loss', 'content': 0.08427423983812332, 'timestamp': '2025-09-10 02:46:19.306334', 'step': 17166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:19.337030', 'step': 17166, 'epoch': 3} {'type': 'loss', 'content': 0.061297398060560226, 'timestamp': '2025-09-10 02:46:19.339461', 'step': 17167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:19.370218', 'step': 17167, 'epoch': 3} {'type': 'loss', 'content': 0.05525035411119461, 'timestamp': '2025-09-10 02:46:19.395406', 'step': 17168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:19.426129', 'step': 17168, 'epoch': 3} {'type': 'loss', 'content': 0.041093096137046814, 'timestamp': '2025-09-10 02:46:19.428743', 'step': 17169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:19.460730', 'step': 17169, 'epoch': 3} {'type': 'loss', 'content': 0.09416685998439789, 'timestamp': '2025-09-10 02:46:19.463259', 'step': 17170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:19.493175', 'step': 17170, 'epoch': 3} {'type': 'loss', 'content': 0.04494989290833473, 'timestamp': '2025-09-10 02:46:19.496142', 'step': 17171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:19.526229', 'step': 17171, 'epoch': 3} {'type': 'loss', 'content': 0.07076192647218704, 'timestamp': '2025-09-10 02:46:19.550217', 'step': 17172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:19.580668', 'step': 17172, 'epoch': 3} {'type': 'loss', 'content': 0.09522005170583725, 'timestamp': '2025-09-10 02:46:19.583322', 'step': 17173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:19.613425', 'step': 17173, 'epoch': 3} {'type': 'loss', 'content': 0.07316906005144119, 'timestamp': '2025-09-10 02:46:19.617802', 'step': 17174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:46:19.649463', 'step': 17174, 'epoch': 3} {'type': 'loss', 'content': 0.11366306990385056, 'timestamp': '2025-09-10 02:46:19.654191', 'step': 17175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:19.687372', 'step': 17175, 'epoch': 3} {'type': 'loss', 'content': 0.04300208017230034, 'timestamp': '2025-09-10 02:46:19.710805', 'step': 17176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:19.740809', 'step': 17176, 'epoch': 3} {'type': 'loss', 'content': 0.12322532385587692, 'timestamp': '2025-09-10 02:46:19.742927', 'step': 17177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:19.773574', 'step': 17177, 'epoch': 3} {'type': 'loss', 'content': 0.015248644165694714, 'timestamp': '2025-09-10 02:46:19.776069', 'step': 17178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:19.809728', 'step': 17178, 'epoch': 3} {'type': 'loss', 'content': 0.08408647030591965, 'timestamp': '2025-09-10 02:46:19.812213', 'step': 17179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:19.849038', 'step': 17179, 'epoch': 3} {'type': 'loss', 'content': 0.020913271233439445, 'timestamp': '2025-09-10 02:46:19.872425', 'step': 17180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:19.906025', 'step': 17180, 'epoch': 3} {'type': 'loss', 'content': 0.09472031146287918, 'timestamp': '2025-09-10 02:46:19.908283', 'step': 17181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:19.938578', 'step': 17181, 'epoch': 3} {'type': 'loss', 'content': 0.11411924660205841, 'timestamp': '2025-09-10 02:46:19.940959', 'step': 17182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:19.970897', 'step': 17182, 'epoch': 3} {'type': 'loss', 'content': 0.04424312710762024, 'timestamp': '2025-09-10 02:46:19.973330', 'step': 17183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.005595', 'step': 17183, 'epoch': 3} {'type': 'loss', 'content': 0.060028862208127975, 'timestamp': '2025-09-10 02:46:20.029541', 'step': 17184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:20.059665', 'step': 17184, 'epoch': 3} {'type': 'loss', 'content': 0.06758903712034225, 'timestamp': '2025-09-10 02:46:20.062081', 'step': 17185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:20.092290', 'step': 17185, 'epoch': 3} {'type': 'loss', 'content': 0.062315043061971664, 'timestamp': '2025-09-10 02:46:20.097971', 'step': 17186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:20.129588', 'step': 17186, 'epoch': 3} {'type': 'loss', 'content': 0.0843522772192955, 'timestamp': '2025-09-10 02:46:20.131819', 'step': 17187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:20.162129', 'step': 17187, 'epoch': 3} {'type': 'loss', 'content': 0.021763568744063377, 'timestamp': '2025-09-10 02:46:20.190594', 'step': 17188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:20.229306', 'step': 17188, 'epoch': 3} {'type': 'loss', 'content': 0.11461420357227325, 'timestamp': '2025-09-10 02:46:20.232251', 'step': 17189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.266413', 'step': 17189, 'epoch': 3} {'type': 'loss', 'content': 0.022851506248116493, 'timestamp': '2025-09-10 02:46:20.268597', 'step': 17190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:20.299127', 'step': 17190, 'epoch': 3} {'type': 'loss', 'content': 0.10517656803131104, 'timestamp': '2025-09-10 02:46:20.301515', 'step': 17191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:20.332536', 'step': 17191, 'epoch': 3} {'type': 'loss', 'content': 0.07728872448205948, 'timestamp': '2025-09-10 02:46:20.357337', 'step': 17192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:20.388207', 'step': 17192, 'epoch': 3} {'type': 'loss', 'content': 0.06642577797174454, 'timestamp': '2025-09-10 02:46:20.390192', 'step': 17193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:20.420441', 'step': 17193, 'epoch': 3} {'type': 'loss', 'content': 0.03894082084298134, 'timestamp': '2025-09-10 02:46:20.423072', 'step': 17194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.454851', 'step': 17194, 'epoch': 3} {'type': 'loss', 'content': 0.03382774442434311, 'timestamp': '2025-09-10 02:46:20.457850', 'step': 17195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.491305', 'step': 17195, 'epoch': 3} {'type': 'loss', 'content': 0.12604600191116333, 'timestamp': '2025-09-10 02:46:20.515179', 'step': 17196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.548195', 'step': 17196, 'epoch': 3} {'type': 'loss', 'content': 0.04837220534682274, 'timestamp': '2025-09-10 02:46:20.555452', 'step': 17197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.604972', 'step': 17197, 'epoch': 3} {'type': 'loss', 'content': 0.08004014939069748, 'timestamp': '2025-09-10 02:46:20.607380', 'step': 17198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.637521', 'step': 17198, 'epoch': 3} {'type': 'loss', 'content': 0.07070821523666382, 'timestamp': '2025-09-10 02:46:20.639755', 'step': 17199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:20.673036', 'step': 17199, 'epoch': 3} {'type': 'loss', 'content': 0.10925482958555222, 'timestamp': '2025-09-10 02:46:20.697345', 'step': 17200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.727938', 'step': 17200, 'epoch': 3} {'type': 'loss', 'content': 0.1071557030081749, 'timestamp': '2025-09-10 02:46:20.730362', 'step': 17201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.759816', 'step': 17201, 'epoch': 3} {'type': 'loss', 'content': 0.043424054980278015, 'timestamp': '2025-09-10 02:46:20.762276', 'step': 17202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:20.792684', 'step': 17202, 'epoch': 3} {'type': 'loss', 'content': 0.05782434344291687, 'timestamp': '2025-09-10 02:46:20.797483', 'step': 17203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:20.827580', 'step': 17203, 'epoch': 3} {'type': 'loss', 'content': 0.06567633152008057, 'timestamp': '2025-09-10 02:46:20.852380', 'step': 17204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:20.883218', 'step': 17204, 'epoch': 3} {'type': 'loss', 'content': 0.06316923350095749, 'timestamp': '2025-09-10 02:46:20.890552', 'step': 17205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:20.926297', 'step': 17205, 'epoch': 3} {'type': 'loss', 'content': 0.0361924022436142, 'timestamp': '2025-09-10 02:46:20.928416', 'step': 17206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:20.959329', 'step': 17206, 'epoch': 3} {'type': 'loss', 'content': 0.03618604317307472, 'timestamp': '2025-09-10 02:46:20.961886', 'step': 17207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:20.997622', 'step': 17207, 'epoch': 3} {'type': 'loss', 'content': 0.08938122540712357, 'timestamp': '2025-09-10 02:46:21.022813', 'step': 17208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:21.062697', 'step': 17208, 'epoch': 3} {'type': 'loss', 'content': 0.08249732106924057, 'timestamp': '2025-09-10 02:46:21.065180', 'step': 17209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:21.105859', 'step': 17209, 'epoch': 3} {'type': 'loss', 'content': 0.060490682721138, 'timestamp': '2025-09-10 02:46:21.109680', 'step': 17210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:21.140202', 'step': 17210, 'epoch': 3} {'type': 'loss', 'content': 0.10695338249206543, 'timestamp': '2025-09-10 02:46:21.142664', 'step': 17211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:21.172599', 'step': 17211, 'epoch': 3} {'type': 'loss', 'content': 0.08695857226848602, 'timestamp': '2025-09-10 02:46:21.198015', 'step': 17212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:21.228124', 'step': 17212, 'epoch': 3} {'type': 'loss', 'content': 0.048616159707307816, 'timestamp': '2025-09-10 02:46:21.230466', 'step': 17213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:21.261198', 'step': 17213, 'epoch': 3} {'type': 'loss', 'content': 0.08354858309030533, 'timestamp': '2025-09-10 02:46:21.264030', 'step': 17214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:21.296336', 'step': 17214, 'epoch': 3} {'type': 'loss', 'content': 0.09077199548482895, 'timestamp': '2025-09-10 02:46:21.298919', 'step': 17215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:21.329554', 'step': 17215, 'epoch': 3} {'type': 'loss', 'content': 0.01525347214192152, 'timestamp': '2025-09-10 02:46:21.353191', 'step': 17216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:21.384912', 'step': 17216, 'epoch': 3} {'type': 'loss', 'content': 0.08713692426681519, 'timestamp': '2025-09-10 02:46:21.387325', 'step': 17217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:21.417640', 'step': 17217, 'epoch': 3} {'type': 'loss', 'content': 0.029310094192624092, 'timestamp': '2025-09-10 02:46:21.420168', 'step': 17218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:21.450952', 'step': 17218, 'epoch': 3} {'type': 'loss', 'content': 0.04517174884676933, 'timestamp': '2025-09-10 02:46:21.453810', 'step': 17219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:21.487480', 'step': 17219, 'epoch': 3} {'type': 'loss', 'content': 0.06293854117393494, 'timestamp': '2025-09-10 02:46:21.511754', 'step': 17220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:21.543141', 'step': 17220, 'epoch': 3} {'type': 'loss', 'content': 0.047413572669029236, 'timestamp': '2025-09-10 02:46:21.545743', 'step': 17221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:21.576040', 'step': 17221, 'epoch': 3} {'type': 'loss', 'content': 0.029032092541456223, 'timestamp': '2025-09-10 02:46:21.578448', 'step': 17222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:21.608786', 'step': 17222, 'epoch': 3} {'type': 'loss', 'content': 0.11042165756225586, 'timestamp': '2025-09-10 02:46:21.611657', 'step': 17223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:21.642584', 'step': 17223, 'epoch': 3} {'type': 'loss', 'content': 0.11106203496456146, 'timestamp': '2025-09-10 02:46:21.667986', 'step': 17224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:21.699821', 'step': 17224, 'epoch': 3} {'type': 'loss', 'content': 0.07498527318239212, 'timestamp': '2025-09-10 02:46:21.702340', 'step': 17225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:21.733864', 'step': 17225, 'epoch': 3} {'type': 'loss', 'content': 0.06426941603422165, 'timestamp': '2025-09-10 02:46:21.736734', 'step': 17226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:21.768616', 'step': 17226, 'epoch': 3} {'type': 'loss', 'content': 0.033811815083026886, 'timestamp': '2025-09-10 02:46:21.771296', 'step': 17227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:21.802490', 'step': 17227, 'epoch': 3} {'type': 'loss', 'content': 0.07582247257232666, 'timestamp': '2025-09-10 02:46:21.826183', 'step': 17228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:21.857877', 'step': 17228, 'epoch': 3} {'type': 'loss', 'content': 0.0867130383849144, 'timestamp': '2025-09-10 02:46:21.862329', 'step': 17229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:21.897682', 'step': 17229, 'epoch': 3} {'type': 'loss', 'content': 0.1479087620973587, 'timestamp': '2025-09-10 02:46:21.900299', 'step': 17230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:21.930933', 'step': 17230, 'epoch': 3} {'type': 'loss', 'content': 0.03264462202787399, 'timestamp': '2025-09-10 02:46:21.933803', 'step': 17231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:21.964605', 'step': 17231, 'epoch': 3} {'type': 'loss', 'content': 0.013596657663583755, 'timestamp': '2025-09-10 02:46:21.988956', 'step': 17232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:22.026823', 'step': 17232, 'epoch': 3} {'type': 'loss', 'content': 0.03548668324947357, 'timestamp': '2025-09-10 02:46:22.031748', 'step': 17233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:22.065164', 'step': 17233, 'epoch': 3} {'type': 'loss', 'content': 0.09014055132865906, 'timestamp': '2025-09-10 02:46:22.071068', 'step': 17234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:22.106017', 'step': 17234, 'epoch': 3} {'type': 'loss', 'content': 0.05924557149410248, 'timestamp': '2025-09-10 02:46:22.109259', 'step': 17235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:22.142675', 'step': 17235, 'epoch': 3} {'type': 'loss', 'content': 0.021542849019169807, 'timestamp': '2025-09-10 02:46:22.167568', 'step': 17236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:22.201602', 'step': 17236, 'epoch': 3} {'type': 'loss', 'content': 0.03083645924925804, 'timestamp': '2025-09-10 02:46:22.205237', 'step': 17237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:22.238430', 'step': 17237, 'epoch': 3} {'type': 'loss', 'content': 0.04943332076072693, 'timestamp': '2025-09-10 02:46:22.241119', 'step': 17238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:22.272910', 'step': 17238, 'epoch': 3} {'type': 'loss', 'content': 0.06639821827411652, 'timestamp': '2025-09-10 02:46:22.276522', 'step': 17239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:46:22.308476', 'step': 17239, 'epoch': 3} {'type': 'loss', 'content': 0.01317373663187027, 'timestamp': '2025-09-10 02:46:22.333729', 'step': 17240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:22.363926', 'step': 17240, 'epoch': 3} {'type': 'loss', 'content': 0.06834076344966888, 'timestamp': '2025-09-10 02:46:22.366719', 'step': 17241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:22.398756', 'step': 17241, 'epoch': 3} {'type': 'loss', 'content': 0.0529819056391716, 'timestamp': '2025-09-10 02:46:22.401341', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:46:30.467472', 'step': 17242, 'epoch': 3} {'type': 'pplx', 'content': 9743.321256732905, 'timestamp': '2025-09-10 02:46:30.470427', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:30.500402', 'step': 17242, 'epoch': 3} {'type': 'loss', 'content': 0.07380206137895584, 'timestamp': '2025-09-10 02:46:30.502706', 'step': 17243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:30.535134', 'step': 17243, 'epoch': 3} {'type': 'loss', 'content': 0.12606529891490936, 'timestamp': '2025-09-10 02:46:30.559332', 'step': 17244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:30.590079', 'step': 17244, 'epoch': 3} {'type': 'loss', 'content': 0.11020119488239288, 'timestamp': '2025-09-10 02:46:30.592534', 'step': 17245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:30.623226', 'step': 17245, 'epoch': 3} {'type': 'loss', 'content': 0.03290226310491562, 'timestamp': '2025-09-10 02:46:30.625660', 'step': 17246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:30.657232', 'step': 17246, 'epoch': 3} {'type': 'loss', 'content': 0.06451895833015442, 'timestamp': '2025-09-10 02:46:30.659516', 'step': 17247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:30.689586', 'step': 17247, 'epoch': 3} {'type': 'loss', 'content': 0.04402361810207367, 'timestamp': '2025-09-10 02:46:30.713749', 'step': 17248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:30.745258', 'step': 17248, 'epoch': 3} {'type': 'loss', 'content': 0.04914621636271477, 'timestamp': '2025-09-10 02:46:30.747513', 'step': 17249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:30.778029', 'step': 17249, 'epoch': 3} {'type': 'loss', 'content': 0.05102851241827011, 'timestamp': '2025-09-10 02:46:30.781815', 'step': 17250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:30.811611', 'step': 17250, 'epoch': 3} {'type': 'loss', 'content': 0.047704242169857025, 'timestamp': '2025-09-10 02:46:30.814072', 'step': 17251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:30.844237', 'step': 17251, 'epoch': 3} {'type': 'loss', 'content': 0.13172857463359833, 'timestamp': '2025-09-10 02:46:30.867692', 'step': 17252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:30.899289', 'step': 17252, 'epoch': 3} {'type': 'loss', 'content': 0.06603819131851196, 'timestamp': '2025-09-10 02:46:30.901431', 'step': 17253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:30.932255', 'step': 17253, 'epoch': 3} {'type': 'loss', 'content': 0.05033859238028526, 'timestamp': '2025-09-10 02:46:30.934524', 'step': 17254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:30.964778', 'step': 17254, 'epoch': 3} {'type': 'loss', 'content': 0.0901239663362503, 'timestamp': '2025-09-10 02:46:30.966979', 'step': 17255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:30.997312', 'step': 17255, 'epoch': 3} {'type': 'loss', 'content': 0.06813520938158035, 'timestamp': '2025-09-10 02:46:31.021281', 'step': 17256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:31.062301', 'step': 17256, 'epoch': 3} {'type': 'loss', 'content': 0.03677293658256531, 'timestamp': '2025-09-10 02:46:31.064437', 'step': 17257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:31.094873', 'step': 17257, 'epoch': 3} {'type': 'loss', 'content': 0.01565043069422245, 'timestamp': '2025-09-10 02:46:31.097400', 'step': 17258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:31.128764', 'step': 17258, 'epoch': 3} {'type': 'loss', 'content': 0.04460427537560463, 'timestamp': '2025-09-10 02:46:31.131152', 'step': 17259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:31.162277', 'step': 17259, 'epoch': 3} {'type': 'loss', 'content': 0.10847368091344833, 'timestamp': '2025-09-10 02:46:31.186142', 'step': 17260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:31.218648', 'step': 17260, 'epoch': 3} {'type': 'loss', 'content': 0.047268908470869064, 'timestamp': '2025-09-10 02:46:31.221743', 'step': 17261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:31.253401', 'step': 17261, 'epoch': 3} {'type': 'loss', 'content': 0.09185902774333954, 'timestamp': '2025-09-10 02:46:31.255931', 'step': 17262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:31.287507', 'step': 17262, 'epoch': 3} {'type': 'loss', 'content': 0.08065886050462723, 'timestamp': '2025-09-10 02:46:31.290503', 'step': 17263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:31.323547', 'step': 17263, 'epoch': 3} {'type': 'loss', 'content': 0.056297533214092255, 'timestamp': '2025-09-10 02:46:31.348938', 'step': 17264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:31.390952', 'step': 17264, 'epoch': 3} {'type': 'loss', 'content': 0.08230236917734146, 'timestamp': '2025-09-10 02:46:31.395317', 'step': 17265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:31.431903', 'step': 17265, 'epoch': 3} {'type': 'loss', 'content': 0.08411520719528198, 'timestamp': '2025-09-10 02:46:31.436530', 'step': 17266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:31.475923', 'step': 17266, 'epoch': 3} {'type': 'loss', 'content': 0.009068993851542473, 'timestamp': '2025-09-10 02:46:31.480604', 'step': 17267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:31.519045', 'step': 17267, 'epoch': 3} {'type': 'loss', 'content': 0.044557295739650726, 'timestamp': '2025-09-10 02:46:31.546022', 'step': 17268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:31.584066', 'step': 17268, 'epoch': 3} {'type': 'loss', 'content': 0.11746729165315628, 'timestamp': '2025-09-10 02:46:31.590006', 'step': 17269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:31.624070', 'step': 17269, 'epoch': 3} {'type': 'loss', 'content': 0.058193471282720566, 'timestamp': '2025-09-10 02:46:31.626572', 'step': 17270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:31.657485', 'step': 17270, 'epoch': 3} {'type': 'loss', 'content': 0.08217912912368774, 'timestamp': '2025-09-10 02:46:31.662962', 'step': 17271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:31.701659', 'step': 17271, 'epoch': 3} {'type': 'loss', 'content': 0.05166371539235115, 'timestamp': '2025-09-10 02:46:31.725398', 'step': 17272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:31.758460', 'step': 17272, 'epoch': 3} {'type': 'loss', 'content': 0.02414720132946968, 'timestamp': '2025-09-10 02:46:31.760623', 'step': 17273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:31.792789', 'step': 17273, 'epoch': 3} {'type': 'loss', 'content': 0.01290371548384428, 'timestamp': '2025-09-10 02:46:31.796447', 'step': 17274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:31.827553', 'step': 17274, 'epoch': 3} {'type': 'loss', 'content': 0.03789536654949188, 'timestamp': '2025-09-10 02:46:31.830002', 'step': 17275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:31.861814', 'step': 17275, 'epoch': 3} {'type': 'loss', 'content': 0.02816390059888363, 'timestamp': '2025-09-10 02:46:31.885411', 'step': 17276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:31.915337', 'step': 17276, 'epoch': 3} {'type': 'loss', 'content': 0.07909718155860901, 'timestamp': '2025-09-10 02:46:31.917570', 'step': 17277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:31.947840', 'step': 17277, 'epoch': 3} {'type': 'loss', 'content': 0.03621735796332359, 'timestamp': '2025-09-10 02:46:31.949904', 'step': 17278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:31.980261', 'step': 17278, 'epoch': 3} {'type': 'loss', 'content': 0.061694152653217316, 'timestamp': '2025-09-10 02:46:31.982640', 'step': 17279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:32.012551', 'step': 17279, 'epoch': 3} {'type': 'loss', 'content': 0.09128817170858383, 'timestamp': '2025-09-10 02:46:32.036035', 'step': 17280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:32.066805', 'step': 17280, 'epoch': 3} {'type': 'loss', 'content': 0.05382043868303299, 'timestamp': '2025-09-10 02:46:32.069088', 'step': 17281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:32.099539', 'step': 17281, 'epoch': 3} {'type': 'loss', 'content': 0.0463922880589962, 'timestamp': '2025-09-10 02:46:32.101826', 'step': 17282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:32.131991', 'step': 17282, 'epoch': 3} {'type': 'loss', 'content': 0.10921552777290344, 'timestamp': '2025-09-10 02:46:32.134292', 'step': 17283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:46:32.164513', 'step': 17283, 'epoch': 3} {'type': 'loss', 'content': 0.13454213738441467, 'timestamp': '2025-09-10 02:46:32.189509', 'step': 17284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:32.220298', 'step': 17284, 'epoch': 3} {'type': 'loss', 'content': 0.08715704828500748, 'timestamp': '2025-09-10 02:46:32.222468', 'step': 17285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:32.252467', 'step': 17285, 'epoch': 3} {'type': 'loss', 'content': 0.07764250040054321, 'timestamp': '2025-09-10 02:46:32.254860', 'step': 17286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:32.285475', 'step': 17286, 'epoch': 3} {'type': 'loss', 'content': 0.10699822008609772, 'timestamp': '2025-09-10 02:46:32.288254', 'step': 17287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:32.318386', 'step': 17287, 'epoch': 3} {'type': 'loss', 'content': 0.011873491108417511, 'timestamp': '2025-09-10 02:46:32.342111', 'step': 17288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:32.372401', 'step': 17288, 'epoch': 3} {'type': 'loss', 'content': 0.0852227583527565, 'timestamp': '2025-09-10 02:46:32.375166', 'step': 17289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:32.406581', 'step': 17289, 'epoch': 3} {'type': 'loss', 'content': 0.0380239337682724, 'timestamp': '2025-09-10 02:46:32.409188', 'step': 17290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:32.440561', 'step': 17290, 'epoch': 3} {'type': 'loss', 'content': 0.11329299211502075, 'timestamp': '2025-09-10 02:46:32.443266', 'step': 17291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:32.474730', 'step': 17291, 'epoch': 3} {'type': 'loss', 'content': 0.032191429287195206, 'timestamp': '2025-09-10 02:46:32.498930', 'step': 17292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:32.534910', 'step': 17292, 'epoch': 3} {'type': 'loss', 'content': 0.08248275518417358, 'timestamp': '2025-09-10 02:46:32.538028', 'step': 17293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:32.569632', 'step': 17293, 'epoch': 3} {'type': 'loss', 'content': 0.06290469318628311, 'timestamp': '2025-09-10 02:46:32.572247', 'step': 17294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:32.602978', 'step': 17294, 'epoch': 3} {'type': 'loss', 'content': 0.036237683147192, 'timestamp': '2025-09-10 02:46:32.607572', 'step': 17295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:32.641178', 'step': 17295, 'epoch': 3} {'type': 'loss', 'content': 0.10355756431818008, 'timestamp': '2025-09-10 02:46:32.664969', 'step': 17296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:32.695130', 'step': 17296, 'epoch': 3} {'type': 'loss', 'content': 0.01932944729924202, 'timestamp': '2025-09-10 02:46:32.697606', 'step': 17297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:32.728504', 'step': 17297, 'epoch': 3} {'type': 'loss', 'content': 0.05281240493059158, 'timestamp': '2025-09-10 02:46:32.730837', 'step': 17298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:32.761531', 'step': 17298, 'epoch': 3} {'type': 'loss', 'content': 0.058101993054151535, 'timestamp': '2025-09-10 02:46:32.763925', 'step': 17299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:32.794513', 'step': 17299, 'epoch': 3} {'type': 'loss', 'content': 0.042989738285541534, 'timestamp': '2025-09-10 02:46:32.818030', 'step': 17300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:32.849072', 'step': 17300, 'epoch': 3} {'type': 'loss', 'content': 0.06735599786043167, 'timestamp': '2025-09-10 02:46:32.851357', 'step': 17301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:32.881628', 'step': 17301, 'epoch': 3} {'type': 'loss', 'content': 0.06506625562906265, 'timestamp': '2025-09-10 02:46:32.884099', 'step': 17302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:32.914280', 'step': 17302, 'epoch': 3} {'type': 'loss', 'content': 0.07742056250572205, 'timestamp': '2025-09-10 02:46:32.916736', 'step': 17303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:32.947666', 'step': 17303, 'epoch': 3} {'type': 'loss', 'content': 0.0817350298166275, 'timestamp': '2025-09-10 02:46:32.971279', 'step': 17304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:33.000649', 'step': 17304, 'epoch': 3} {'type': 'loss', 'content': 0.04099929705262184, 'timestamp': '2025-09-10 02:46:33.003131', 'step': 17305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:33.033207', 'step': 17305, 'epoch': 3} {'type': 'loss', 'content': 0.045006535947322845, 'timestamp': '2025-09-10 02:46:33.038333', 'step': 17306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:33.070156', 'step': 17306, 'epoch': 3} {'type': 'loss', 'content': 0.09197023510932922, 'timestamp': '2025-09-10 02:46:33.072348', 'step': 17307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:33.102761', 'step': 17307, 'epoch': 3} {'type': 'loss', 'content': 0.019181376323103905, 'timestamp': '2025-09-10 02:46:33.126287', 'step': 17308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:33.157672', 'step': 17308, 'epoch': 3} {'type': 'loss', 'content': 0.07118116319179535, 'timestamp': '2025-09-10 02:46:33.160199', 'step': 17309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:33.190281', 'step': 17309, 'epoch': 3} {'type': 'loss', 'content': 0.09812445938587189, 'timestamp': '2025-09-10 02:46:33.192903', 'step': 17310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:33.222695', 'step': 17310, 'epoch': 3} {'type': 'loss', 'content': 0.1117606833577156, 'timestamp': '2025-09-10 02:46:33.225102', 'step': 17311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:33.255128', 'step': 17311, 'epoch': 3} {'type': 'loss', 'content': 0.09159349650144577, 'timestamp': '2025-09-10 02:46:33.278577', 'step': 17312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:33.308889', 'step': 17312, 'epoch': 3} {'type': 'loss', 'content': 0.07170373201370239, 'timestamp': '2025-09-10 02:46:33.311584', 'step': 17313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:33.346363', 'step': 17313, 'epoch': 3} {'type': 'loss', 'content': 0.03313129395246506, 'timestamp': '2025-09-10 02:46:33.350545', 'step': 17314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:33.387949', 'step': 17314, 'epoch': 3} {'type': 'loss', 'content': 0.08822348713874817, 'timestamp': '2025-09-10 02:46:33.390775', 'step': 17315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:33.428895', 'step': 17315, 'epoch': 3} {'type': 'loss', 'content': 0.08238518238067627, 'timestamp': '2025-09-10 02:46:33.454579', 'step': 17316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:33.490377', 'step': 17316, 'epoch': 3} {'type': 'loss', 'content': 0.0237916000187397, 'timestamp': '2025-09-10 02:46:33.493801', 'step': 17317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:33.532844', 'step': 17317, 'epoch': 3} {'type': 'loss', 'content': 0.022835416719317436, 'timestamp': '2025-09-10 02:46:33.536987', 'step': 17318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:33.569686', 'step': 17318, 'epoch': 3} {'type': 'loss', 'content': 0.12129389494657516, 'timestamp': '2025-09-10 02:46:33.574154', 'step': 17319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:33.605106', 'step': 17319, 'epoch': 3} {'type': 'loss', 'content': 0.04468606784939766, 'timestamp': '2025-09-10 02:46:33.628664', 'step': 17320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:33.659416', 'step': 17320, 'epoch': 3} {'type': 'loss', 'content': 0.10213169455528259, 'timestamp': '2025-09-10 02:46:33.662417', 'step': 17321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:33.696320', 'step': 17321, 'epoch': 3} {'type': 'loss', 'content': 0.06738495081663132, 'timestamp': '2025-09-10 02:46:33.698484', 'step': 17322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:33.730206', 'step': 17322, 'epoch': 3} {'type': 'loss', 'content': 0.04265078157186508, 'timestamp': '2025-09-10 02:46:33.732718', 'step': 17323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:33.764459', 'step': 17323, 'epoch': 3} {'type': 'loss', 'content': 0.042829278856515884, 'timestamp': '2025-09-10 02:46:33.788149', 'step': 17324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:33.821068', 'step': 17324, 'epoch': 3} {'type': 'loss', 'content': 0.04680800065398216, 'timestamp': '2025-09-10 02:46:33.823478', 'step': 17325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:33.854002', 'step': 17325, 'epoch': 3} {'type': 'loss', 'content': 0.026493912562727928, 'timestamp': '2025-09-10 02:46:33.856542', 'step': 17326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:33.887860', 'step': 17326, 'epoch': 3} {'type': 'loss', 'content': 0.06700572371482849, 'timestamp': '2025-09-10 02:46:33.890455', 'step': 17327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:33.923192', 'step': 17327, 'epoch': 3} {'type': 'loss', 'content': 0.017794881016016006, 'timestamp': '2025-09-10 02:46:33.947839', 'step': 17328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:33.978996', 'step': 17328, 'epoch': 3} {'type': 'loss', 'content': 0.15528607368469238, 'timestamp': '2025-09-10 02:46:33.981163', 'step': 17329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:34.011481', 'step': 17329, 'epoch': 3} {'type': 'loss', 'content': 0.10921957343816757, 'timestamp': '2025-09-10 02:46:34.014235', 'step': 17330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:34.044490', 'step': 17330, 'epoch': 3} {'type': 'loss', 'content': 0.10130919516086578, 'timestamp': '2025-09-10 02:46:34.046843', 'step': 17331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:34.078064', 'step': 17331, 'epoch': 3} {'type': 'loss', 'content': 0.12476816773414612, 'timestamp': '2025-09-10 02:46:34.101899', 'step': 17332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.135532', 'step': 17332, 'epoch': 3} {'type': 'loss', 'content': 0.028866345062851906, 'timestamp': '2025-09-10 02:46:34.138041', 'step': 17333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:34.168997', 'step': 17333, 'epoch': 3} {'type': 'loss', 'content': 0.10719115287065506, 'timestamp': '2025-09-10 02:46:34.171059', 'step': 17334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:34.200917', 'step': 17334, 'epoch': 3} {'type': 'loss', 'content': 0.047082722187042236, 'timestamp': '2025-09-10 02:46:34.203311', 'step': 17335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:34.234382', 'step': 17335, 'epoch': 3} {'type': 'loss', 'content': 0.11077993363142014, 'timestamp': '2025-09-10 02:46:34.257866', 'step': 17336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:34.288458', 'step': 17336, 'epoch': 3} {'type': 'loss', 'content': 0.08402054756879807, 'timestamp': '2025-09-10 02:46:34.290784', 'step': 17337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:34.321261', 'step': 17337, 'epoch': 3} {'type': 'loss', 'content': 0.08011532574892044, 'timestamp': '2025-09-10 02:46:34.323774', 'step': 17338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.355671', 'step': 17338, 'epoch': 3} {'type': 'loss', 'content': 0.0721333846449852, 'timestamp': '2025-09-10 02:46:34.358516', 'step': 17339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.389228', 'step': 17339, 'epoch': 3} {'type': 'loss', 'content': 0.08163421601057053, 'timestamp': '2025-09-10 02:46:34.412831', 'step': 17340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:34.443517', 'step': 17340, 'epoch': 3} {'type': 'loss', 'content': 0.054565347731113434, 'timestamp': '2025-09-10 02:46:34.445951', 'step': 17341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:34.476236', 'step': 17341, 'epoch': 3} {'type': 'loss', 'content': 0.07065650820732117, 'timestamp': '2025-09-10 02:46:34.478708', 'step': 17342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:34.509283', 'step': 17342, 'epoch': 3} {'type': 'loss', 'content': 0.10224657505750656, 'timestamp': '2025-09-10 02:46:34.511576', 'step': 17343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:34.541716', 'step': 17343, 'epoch': 3} {'type': 'loss', 'content': 0.06732282042503357, 'timestamp': '2025-09-10 02:46:34.565604', 'step': 17344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.596373', 'step': 17344, 'epoch': 3} {'type': 'loss', 'content': 0.10925023257732391, 'timestamp': '2025-09-10 02:46:34.598706', 'step': 17345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.629144', 'step': 17345, 'epoch': 3} {'type': 'loss', 'content': 0.06297248601913452, 'timestamp': '2025-09-10 02:46:34.632545', 'step': 17346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.662918', 'step': 17346, 'epoch': 3} {'type': 'loss', 'content': 0.04797281697392464, 'timestamp': '2025-09-10 02:46:34.665520', 'step': 17347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:34.696075', 'step': 17347, 'epoch': 3} {'type': 'loss', 'content': 0.0779467523097992, 'timestamp': '2025-09-10 02:46:34.719733', 'step': 17348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.750903', 'step': 17348, 'epoch': 3} {'type': 'loss', 'content': 0.05381380766630173, 'timestamp': '2025-09-10 02:46:34.753230', 'step': 17349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:34.783231', 'step': 17349, 'epoch': 3} {'type': 'loss', 'content': 0.042779020965099335, 'timestamp': '2025-09-10 02:46:34.786061', 'step': 17350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.817060', 'step': 17350, 'epoch': 3} {'type': 'loss', 'content': 0.1301228106021881, 'timestamp': '2025-09-10 02:46:34.819134', 'step': 17351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.849408', 'step': 17351, 'epoch': 3} {'type': 'loss', 'content': 0.054857734590768814, 'timestamp': '2025-09-10 02:46:34.872971', 'step': 17352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.903002', 'step': 17352, 'epoch': 3} {'type': 'loss', 'content': 0.04904086887836456, 'timestamp': '2025-09-10 02:46:34.905603', 'step': 17353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:34.936460', 'step': 17353, 'epoch': 3} {'type': 'loss', 'content': 0.07743817567825317, 'timestamp': '2025-09-10 02:46:34.938568', 'step': 17354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:34.969456', 'step': 17354, 'epoch': 3} {'type': 'loss', 'content': 0.14085987210273743, 'timestamp': '2025-09-10 02:46:34.972005', 'step': 17355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.001678', 'step': 17355, 'epoch': 3} {'type': 'loss', 'content': 0.07426612079143524, 'timestamp': '2025-09-10 02:46:35.025312', 'step': 17356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.056131', 'step': 17356, 'epoch': 3} {'type': 'loss', 'content': 0.02687353827059269, 'timestamp': '2025-09-10 02:46:35.058237', 'step': 17357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.088279', 'step': 17357, 'epoch': 3} {'type': 'loss', 'content': 0.07911917567253113, 'timestamp': '2025-09-10 02:46:35.090690', 'step': 17358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:35.121764', 'step': 17358, 'epoch': 3} {'type': 'loss', 'content': 0.07454578578472137, 'timestamp': '2025-09-10 02:46:35.123927', 'step': 17359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:35.155781', 'step': 17359, 'epoch': 3} {'type': 'loss', 'content': 0.12404826283454895, 'timestamp': '2025-09-10 02:46:35.179038', 'step': 17360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.209739', 'step': 17360, 'epoch': 3} {'type': 'loss', 'content': 0.07017532736063004, 'timestamp': '2025-09-10 02:46:35.212494', 'step': 17361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.244551', 'step': 17361, 'epoch': 3} {'type': 'loss', 'content': 0.1286509931087494, 'timestamp': '2025-09-10 02:46:35.246884', 'step': 17362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:35.277460', 'step': 17362, 'epoch': 3} {'type': 'loss', 'content': 0.04913783073425293, 'timestamp': '2025-09-10 02:46:35.279778', 'step': 17363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:35.310504', 'step': 17363, 'epoch': 3} {'type': 'loss', 'content': 0.11160603165626526, 'timestamp': '2025-09-10 02:46:35.333863', 'step': 17364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:35.367637', 'step': 17364, 'epoch': 3} {'type': 'loss', 'content': 0.0023442956153303385, 'timestamp': '2025-09-10 02:46:35.370006', 'step': 17365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:35.400398', 'step': 17365, 'epoch': 3} {'type': 'loss', 'content': 0.0967244803905487, 'timestamp': '2025-09-10 02:46:35.402700', 'step': 17366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:35.435364', 'step': 17366, 'epoch': 3} {'type': 'loss', 'content': 0.040404241532087326, 'timestamp': '2025-09-10 02:46:35.438501', 'step': 17367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.472453', 'step': 17367, 'epoch': 3} {'type': 'loss', 'content': 0.07514046877622604, 'timestamp': '2025-09-10 02:46:35.495791', 'step': 17368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:35.525782', 'step': 17368, 'epoch': 3} {'type': 'loss', 'content': 0.0770254135131836, 'timestamp': '2025-09-10 02:46:35.528187', 'step': 17369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:35.562355', 'step': 17369, 'epoch': 3} {'type': 'loss', 'content': 0.06993532180786133, 'timestamp': '2025-09-10 02:46:35.564882', 'step': 17370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:35.597023', 'step': 17370, 'epoch': 3} {'type': 'loss', 'content': 0.041616570204496384, 'timestamp': '2025-09-10 02:46:35.600928', 'step': 17371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.634126', 'step': 17371, 'epoch': 3} {'type': 'loss', 'content': 0.07124827802181244, 'timestamp': '2025-09-10 02:46:35.657563', 'step': 17372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.689119', 'step': 17372, 'epoch': 3} {'type': 'loss', 'content': 0.13236404955387115, 'timestamp': '2025-09-10 02:46:35.691538', 'step': 17373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:35.721736', 'step': 17373, 'epoch': 3} {'type': 'loss', 'content': 0.02763337455689907, 'timestamp': '2025-09-10 02:46:35.725536', 'step': 17374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.758372', 'step': 17374, 'epoch': 3} {'type': 'loss', 'content': 0.0747680813074112, 'timestamp': '2025-09-10 02:46:35.760672', 'step': 17375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:35.792861', 'step': 17375, 'epoch': 3} {'type': 'loss', 'content': 0.06158652901649475, 'timestamp': '2025-09-10 02:46:35.816326', 'step': 17376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.847572', 'step': 17376, 'epoch': 3} {'type': 'loss', 'content': 0.0511479377746582, 'timestamp': '2025-09-10 02:46:35.853371', 'step': 17377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.884047', 'step': 17377, 'epoch': 3} {'type': 'loss', 'content': 0.03389405086636543, 'timestamp': '2025-09-10 02:46:35.886563', 'step': 17378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.917355', 'step': 17378, 'epoch': 3} {'type': 'loss', 'content': 0.04347670450806618, 'timestamp': '2025-09-10 02:46:35.919630', 'step': 17379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:35.949862', 'step': 17379, 'epoch': 3} {'type': 'loss', 'content': 0.059333380311727524, 'timestamp': '2025-09-10 02:46:35.973570', 'step': 17380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:36.008937', 'step': 17380, 'epoch': 3} {'type': 'loss', 'content': 0.08714324235916138, 'timestamp': '2025-09-10 02:46:36.011808', 'step': 17381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:36.042788', 'step': 17381, 'epoch': 3} {'type': 'loss', 'content': 0.06567565351724625, 'timestamp': '2025-09-10 02:46:36.053082', 'step': 17382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:36.087647', 'step': 17382, 'epoch': 3} {'type': 'loss', 'content': 0.08057525008916855, 'timestamp': '2025-09-10 02:46:36.090133', 'step': 17383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:36.122092', 'step': 17383, 'epoch': 3} {'type': 'loss', 'content': 0.0477866567671299, 'timestamp': '2025-09-10 02:46:36.146028', 'step': 17384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:36.178558', 'step': 17384, 'epoch': 3} {'type': 'loss', 'content': 0.10561282187700272, 'timestamp': '2025-09-10 02:46:36.184737', 'step': 17385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:36.218701', 'step': 17385, 'epoch': 3} {'type': 'loss', 'content': 0.06422457844018936, 'timestamp': '2025-09-10 02:46:36.221617', 'step': 17386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:36.258550', 'step': 17386, 'epoch': 3} {'type': 'loss', 'content': 0.036838095635175705, 'timestamp': '2025-09-10 02:46:36.261611', 'step': 17387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:36.296203', 'step': 17387, 'epoch': 3} {'type': 'loss', 'content': 0.10857338458299637, 'timestamp': '2025-09-10 02:46:36.319640', 'step': 17388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:36.352297', 'step': 17388, 'epoch': 3} {'type': 'loss', 'content': 0.033920906484127045, 'timestamp': '2025-09-10 02:46:36.355460', 'step': 17389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:36.386659', 'step': 17389, 'epoch': 3} {'type': 'loss', 'content': 0.2066275179386139, 'timestamp': '2025-09-10 02:46:36.389041', 'step': 17390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:36.423357', 'step': 17390, 'epoch': 3} {'type': 'loss', 'content': 0.11090582609176636, 'timestamp': '2025-09-10 02:46:36.425711', 'step': 17391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:36.456475', 'step': 17391, 'epoch': 3} {'type': 'loss', 'content': 0.10170108079910278, 'timestamp': '2025-09-10 02:46:36.479984', 'step': 17392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:36.517257', 'step': 17392, 'epoch': 3} {'type': 'loss', 'content': 0.0275790523737669, 'timestamp': '2025-09-10 02:46:36.521359', 'step': 17393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:36.553405', 'step': 17393, 'epoch': 3} {'type': 'loss', 'content': 0.11179064214229584, 'timestamp': '2025-09-10 02:46:36.557388', 'step': 17394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:36.591827', 'step': 17394, 'epoch': 3} {'type': 'loss', 'content': 0.030286362394690514, 'timestamp': '2025-09-10 02:46:36.595285', 'step': 17395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:36.628547', 'step': 17395, 'epoch': 3} {'type': 'loss', 'content': 0.0884958952665329, 'timestamp': '2025-09-10 02:46:36.652639', 'step': 17396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:36.685232', 'step': 17396, 'epoch': 3} {'type': 'loss', 'content': 0.0762595608830452, 'timestamp': '2025-09-10 02:46:36.687457', 'step': 17397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:36.717535', 'step': 17397, 'epoch': 3} {'type': 'loss', 'content': 0.016919009387493134, 'timestamp': '2025-09-10 02:46:36.720101', 'step': 17398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:36.751396', 'step': 17398, 'epoch': 3} {'type': 'loss', 'content': 0.03543618693947792, 'timestamp': '2025-09-10 02:46:36.753665', 'step': 17399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:36.788029', 'step': 17399, 'epoch': 3} {'type': 'loss', 'content': 0.031182345002889633, 'timestamp': '2025-09-10 02:46:36.812793', 'step': 17400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:36.844156', 'step': 17400, 'epoch': 3} {'type': 'loss', 'content': 0.10111315548419952, 'timestamp': '2025-09-10 02:46:36.846456', 'step': 17401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:36.879027', 'step': 17401, 'epoch': 3} {'type': 'loss', 'content': 0.0491207130253315, 'timestamp': '2025-09-10 02:46:36.881480', 'step': 17402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:36.913074', 'step': 17402, 'epoch': 3} {'type': 'loss', 'content': 0.13089194893836975, 'timestamp': '2025-09-10 02:46:36.916199', 'step': 17403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:36.950127', 'step': 17403, 'epoch': 3} {'type': 'loss', 'content': 0.08979165554046631, 'timestamp': '2025-09-10 02:46:36.974151', 'step': 17404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:37.007013', 'step': 17404, 'epoch': 3} {'type': 'loss', 'content': 0.07737863063812256, 'timestamp': '2025-09-10 02:46:37.009260', 'step': 17405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:37.042427', 'step': 17405, 'epoch': 3} {'type': 'loss', 'content': 0.10206715762615204, 'timestamp': '2025-09-10 02:46:37.052506', 'step': 17406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:37.095571', 'step': 17406, 'epoch': 3} {'type': 'loss', 'content': 0.05954177677631378, 'timestamp': '2025-09-10 02:46:37.098237', 'step': 17407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:37.130313', 'step': 17407, 'epoch': 3} {'type': 'loss', 'content': 0.04615700617432594, 'timestamp': '2025-09-10 02:46:37.153798', 'step': 17408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.185674', 'step': 17408, 'epoch': 3} {'type': 'loss', 'content': 0.034497249871492386, 'timestamp': '2025-09-10 02:46:37.188565', 'step': 17409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:37.219328', 'step': 17409, 'epoch': 3} {'type': 'loss', 'content': 0.0367116741836071, 'timestamp': '2025-09-10 02:46:37.224536', 'step': 17410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:37.258891', 'step': 17410, 'epoch': 3} {'type': 'loss', 'content': 0.10646557062864304, 'timestamp': '2025-09-10 02:46:37.261115', 'step': 17411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:37.293713', 'step': 17411, 'epoch': 3} {'type': 'loss', 'content': 0.08897402137517929, 'timestamp': '2025-09-10 02:46:37.317079', 'step': 17412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:37.348274', 'step': 17412, 'epoch': 3} {'type': 'loss', 'content': 0.019664766266942024, 'timestamp': '2025-09-10 02:46:37.350695', 'step': 17413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.380265', 'step': 17413, 'epoch': 3} {'type': 'loss', 'content': 0.1519145965576172, 'timestamp': '2025-09-10 02:46:37.382298', 'step': 17414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.413294', 'step': 17414, 'epoch': 3} {'type': 'loss', 'content': 0.09146073460578918, 'timestamp': '2025-09-10 02:46:37.415998', 'step': 17415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:37.446721', 'step': 17415, 'epoch': 3} {'type': 'loss', 'content': 0.08403349667787552, 'timestamp': '2025-09-10 02:46:37.470316', 'step': 17416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.500357', 'step': 17416, 'epoch': 3} {'type': 'loss', 'content': 0.04865070804953575, 'timestamp': '2025-09-10 02:46:37.503077', 'step': 17417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:37.534753', 'step': 17417, 'epoch': 3} {'type': 'loss', 'content': 0.1221824362874031, 'timestamp': '2025-09-10 02:46:37.537482', 'step': 17418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.567638', 'step': 17418, 'epoch': 3} {'type': 'loss', 'content': 0.02248349040746689, 'timestamp': '2025-09-10 02:46:37.571478', 'step': 17419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:37.603052', 'step': 17419, 'epoch': 3} {'type': 'loss', 'content': 0.06191851943731308, 'timestamp': '2025-09-10 02:46:37.627762', 'step': 17420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.661511', 'step': 17420, 'epoch': 3} {'type': 'loss', 'content': 0.0863160565495491, 'timestamp': '2025-09-10 02:46:37.663611', 'step': 17421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.695937', 'step': 17421, 'epoch': 3} {'type': 'loss', 'content': 0.10031665861606598, 'timestamp': '2025-09-10 02:46:37.698490', 'step': 17422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.729562', 'step': 17422, 'epoch': 3} {'type': 'loss', 'content': 0.03198985382914543, 'timestamp': '2025-09-10 02:46:37.732497', 'step': 17423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:37.764751', 'step': 17423, 'epoch': 3} {'type': 'loss', 'content': 0.044492196291685104, 'timestamp': '2025-09-10 02:46:37.788353', 'step': 17424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:37.819581', 'step': 17424, 'epoch': 3} {'type': 'loss', 'content': 0.07545455545186996, 'timestamp': '2025-09-10 02:46:37.821899', 'step': 17425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.852994', 'step': 17425, 'epoch': 3} {'type': 'loss', 'content': 0.01995064690709114, 'timestamp': '2025-09-10 02:46:37.856541', 'step': 17426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:37.888137', 'step': 17426, 'epoch': 3} {'type': 'loss', 'content': 0.07239839434623718, 'timestamp': '2025-09-10 02:46:37.890284', 'step': 17427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:37.920248', 'step': 17427, 'epoch': 3} {'type': 'loss', 'content': 0.1610397845506668, 'timestamp': '2025-09-10 02:46:37.943730', 'step': 17428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:37.974348', 'step': 17428, 'epoch': 3} {'type': 'loss', 'content': 0.03435725346207619, 'timestamp': '2025-09-10 02:46:37.977188', 'step': 17429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.007464', 'step': 17429, 'epoch': 3} {'type': 'loss', 'content': 0.047951310873031616, 'timestamp': '2025-09-10 02:46:38.009381', 'step': 17430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:38.039191', 'step': 17430, 'epoch': 3} {'type': 'loss', 'content': 0.08915047347545624, 'timestamp': '2025-09-10 02:46:38.041362', 'step': 17431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:38.077134', 'step': 17431, 'epoch': 3} {'type': 'loss', 'content': 0.04905718192458153, 'timestamp': '2025-09-10 02:46:38.100566', 'step': 17432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.131099', 'step': 17432, 'epoch': 3} {'type': 'loss', 'content': 0.10970547050237656, 'timestamp': '2025-09-10 02:46:38.135783', 'step': 17433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.166389', 'step': 17433, 'epoch': 3} {'type': 'loss', 'content': 0.07260841876268387, 'timestamp': '2025-09-10 02:46:38.168700', 'step': 17434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:38.199094', 'step': 17434, 'epoch': 3} {'type': 'loss', 'content': 0.09475527703762054, 'timestamp': '2025-09-10 02:46:38.201486', 'step': 17435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:38.231512', 'step': 17435, 'epoch': 3} {'type': 'loss', 'content': 0.062281396239995956, 'timestamp': '2025-09-10 02:46:38.255163', 'step': 17436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.285639', 'step': 17436, 'epoch': 3} {'type': 'loss', 'content': 0.0680481493473053, 'timestamp': '2025-09-10 02:46:38.289191', 'step': 17437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.319554', 'step': 17437, 'epoch': 3} {'type': 'loss', 'content': 0.05543777719140053, 'timestamp': '2025-09-10 02:46:38.321761', 'step': 17438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:38.352287', 'step': 17438, 'epoch': 3} {'type': 'loss', 'content': 0.06915523111820221, 'timestamp': '2025-09-10 02:46:38.354476', 'step': 17439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.384555', 'step': 17439, 'epoch': 3} {'type': 'loss', 'content': 0.03910107538104057, 'timestamp': '2025-09-10 02:46:38.408318', 'step': 17440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:38.440981', 'step': 17440, 'epoch': 3} {'type': 'loss', 'content': 0.09924081712961197, 'timestamp': '2025-09-10 02:46:38.443130', 'step': 17441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:38.473050', 'step': 17441, 'epoch': 3} {'type': 'loss', 'content': 0.06629633903503418, 'timestamp': '2025-09-10 02:46:38.475184', 'step': 17442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:38.506147', 'step': 17442, 'epoch': 3} {'type': 'loss', 'content': 0.04992521554231644, 'timestamp': '2025-09-10 02:46:38.508554', 'step': 17443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.538635', 'step': 17443, 'epoch': 3} {'type': 'loss', 'content': 0.0518241710960865, 'timestamp': '2025-09-10 02:46:38.562082', 'step': 17444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:38.592272', 'step': 17444, 'epoch': 3} {'type': 'loss', 'content': 0.04671657085418701, 'timestamp': '2025-09-10 02:46:38.595080', 'step': 17445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.626464', 'step': 17445, 'epoch': 3} {'type': 'loss', 'content': 0.04812229052186012, 'timestamp': '2025-09-10 02:46:38.629156', 'step': 17446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:38.659120', 'step': 17446, 'epoch': 3} {'type': 'loss', 'content': 0.043668679893016815, 'timestamp': '2025-09-10 02:46:38.661358', 'step': 17447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:38.691574', 'step': 17447, 'epoch': 3} {'type': 'loss', 'content': 0.09036999940872192, 'timestamp': '2025-09-10 02:46:38.714855', 'step': 17448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:38.747653', 'step': 17448, 'epoch': 3} {'type': 'loss', 'content': 0.052687957882881165, 'timestamp': '2025-09-10 02:46:38.749706', 'step': 17449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.779619', 'step': 17449, 'epoch': 3} {'type': 'loss', 'content': 0.08520695567131042, 'timestamp': '2025-09-10 02:46:38.781770', 'step': 17450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:38.813064', 'step': 17450, 'epoch': 3} {'type': 'loss', 'content': 0.1159830391407013, 'timestamp': '2025-09-10 02:46:38.815556', 'step': 17451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:38.846105', 'step': 17451, 'epoch': 3} {'type': 'loss', 'content': 0.07179217040538788, 'timestamp': '2025-09-10 02:46:38.869552', 'step': 17452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:38.899779', 'step': 17452, 'epoch': 3} {'type': 'loss', 'content': 0.061231207102537155, 'timestamp': '2025-09-10 02:46:38.901909', 'step': 17453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:38.931594', 'step': 17453, 'epoch': 3} {'type': 'loss', 'content': 0.09302469342947006, 'timestamp': '2025-09-10 02:46:38.934340', 'step': 17454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.965495', 'step': 17454, 'epoch': 3} {'type': 'loss', 'content': 0.1596713662147522, 'timestamp': '2025-09-10 02:46:38.967558', 'step': 17455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:38.997544', 'step': 17455, 'epoch': 3} {'type': 'loss', 'content': 0.09607339650392532, 'timestamp': '2025-09-10 02:46:39.020901', 'step': 17456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:39.052243', 'step': 17456, 'epoch': 3} {'type': 'loss', 'content': 0.026612337678670883, 'timestamp': '2025-09-10 02:46:39.058643', 'step': 17457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:39.094610', 'step': 17457, 'epoch': 3} {'type': 'loss', 'content': 0.03830547258257866, 'timestamp': '2025-09-10 02:46:39.096951', 'step': 17458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:39.126974', 'step': 17458, 'epoch': 3} {'type': 'loss', 'content': 0.040685057640075684, 'timestamp': '2025-09-10 02:46:39.129262', 'step': 17459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:39.160978', 'step': 17459, 'epoch': 3} {'type': 'loss', 'content': 0.054273948073387146, 'timestamp': '2025-09-10 02:46:39.184517', 'step': 17460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:39.216538', 'step': 17460, 'epoch': 3} {'type': 'loss', 'content': 0.07934865355491638, 'timestamp': '2025-09-10 02:46:39.219840', 'step': 17461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:39.253261', 'step': 17461, 'epoch': 3} {'type': 'loss', 'content': 0.06364195048809052, 'timestamp': '2025-09-10 02:46:39.255595', 'step': 17462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:39.285707', 'step': 17462, 'epoch': 3} {'type': 'loss', 'content': 0.026708651334047318, 'timestamp': '2025-09-10 02:46:39.288427', 'step': 17463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:39.320596', 'step': 17463, 'epoch': 3} {'type': 'loss', 'content': 0.05214923992753029, 'timestamp': '2025-09-10 02:46:39.344117', 'step': 17464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:39.374541', 'step': 17464, 'epoch': 3} {'type': 'loss', 'content': 0.09045235067605972, 'timestamp': '2025-09-10 02:46:39.376753', 'step': 17465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:39.408155', 'step': 17465, 'epoch': 3} {'type': 'loss', 'content': 0.04729117080569267, 'timestamp': '2025-09-10 02:46:39.410634', 'step': 17466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:39.441700', 'step': 17466, 'epoch': 3} {'type': 'loss', 'content': 0.08253705501556396, 'timestamp': '2025-09-10 02:46:39.443905', 'step': 17467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:39.474374', 'step': 17467, 'epoch': 3} {'type': 'loss', 'content': 0.11714435368776321, 'timestamp': '2025-09-10 02:46:39.497801', 'step': 17468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:39.528148', 'step': 17468, 'epoch': 3} {'type': 'loss', 'content': 0.0769258663058281, 'timestamp': '2025-09-10 02:46:39.530394', 'step': 17469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:39.560498', 'step': 17469, 'epoch': 3} {'type': 'loss', 'content': 0.05968499928712845, 'timestamp': '2025-09-10 02:46:39.562980', 'step': 17470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:39.592949', 'step': 17470, 'epoch': 3} {'type': 'loss', 'content': 0.11037874221801758, 'timestamp': '2025-09-10 02:46:39.596470', 'step': 17471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:39.626407', 'step': 17471, 'epoch': 3} {'type': 'loss', 'content': 0.02384946309030056, 'timestamp': '2025-09-10 02:46:39.649786', 'step': 17472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:39.680283', 'step': 17472, 'epoch': 3} {'type': 'loss', 'content': 0.08234085142612457, 'timestamp': '2025-09-10 02:46:39.682653', 'step': 17473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:39.712534', 'step': 17473, 'epoch': 3} {'type': 'loss', 'content': 0.09290771186351776, 'timestamp': '2025-09-10 02:46:39.715289', 'step': 17474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:39.746242', 'step': 17474, 'epoch': 3} {'type': 'loss', 'content': 0.06284362822771072, 'timestamp': '2025-09-10 02:46:39.748522', 'step': 17475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:39.779072', 'step': 17475, 'epoch': 3} {'type': 'loss', 'content': 0.06556852161884308, 'timestamp': '2025-09-10 02:46:39.802346', 'step': 17476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:39.833172', 'step': 17476, 'epoch': 3} {'type': 'loss', 'content': 0.04426179081201553, 'timestamp': '2025-09-10 02:46:39.835682', 'step': 17477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:39.865621', 'step': 17477, 'epoch': 3} {'type': 'loss', 'content': 0.08211962878704071, 'timestamp': '2025-09-10 02:46:39.867908', 'step': 17478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:39.898129', 'step': 17478, 'epoch': 3} {'type': 'loss', 'content': 0.054195649921894073, 'timestamp': '2025-09-10 02:46:39.900544', 'step': 17479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:39.930540', 'step': 17479, 'epoch': 3} {'type': 'loss', 'content': 0.036262355744838715, 'timestamp': '2025-09-10 02:46:39.954017', 'step': 17480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:39.984627', 'step': 17480, 'epoch': 3} {'type': 'loss', 'content': 0.02438557706773281, 'timestamp': '2025-09-10 02:46:39.986842', 'step': 17481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:40.017407', 'step': 17481, 'epoch': 3} {'type': 'loss', 'content': 0.013720127753913403, 'timestamp': '2025-09-10 02:46:40.019982', 'step': 17482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:40.049465', 'step': 17482, 'epoch': 3} {'type': 'loss', 'content': 0.07174079865217209, 'timestamp': '2025-09-10 02:46:40.051644', 'step': 17483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:40.089130', 'step': 17483, 'epoch': 3} {'type': 'loss', 'content': 0.010114933364093304, 'timestamp': '2025-09-10 02:46:40.112611', 'step': 17484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:40.143231', 'step': 17484, 'epoch': 3} {'type': 'loss', 'content': 0.07195989042520523, 'timestamp': '2025-09-10 02:46:40.145639', 'step': 17485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:40.178631', 'step': 17485, 'epoch': 3} {'type': 'loss', 'content': 0.03956181928515434, 'timestamp': '2025-09-10 02:46:40.180811', 'step': 17486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:40.210481', 'step': 17486, 'epoch': 3} {'type': 'loss', 'content': 0.07605036348104477, 'timestamp': '2025-09-10 02:46:40.213485', 'step': 17487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:40.245859', 'step': 17487, 'epoch': 3} {'type': 'loss', 'content': 0.10274135321378708, 'timestamp': '2025-09-10 02:46:40.268985', 'step': 17488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:40.299975', 'step': 17488, 'epoch': 3} {'type': 'loss', 'content': 0.04955194890499115, 'timestamp': '2025-09-10 02:46:40.302196', 'step': 17489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:40.333643', 'step': 17489, 'epoch': 3} {'type': 'loss', 'content': 0.06256595999002457, 'timestamp': '2025-09-10 02:46:40.335906', 'step': 17490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:40.366133', 'step': 17490, 'epoch': 3} {'type': 'loss', 'content': 0.08541937917470932, 'timestamp': '2025-09-10 02:46:40.368539', 'step': 17491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:40.399973', 'step': 17491, 'epoch': 3} {'type': 'loss', 'content': 0.04002983123064041, 'timestamp': '2025-09-10 02:46:40.423538', 'step': 17492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:40.453967', 'step': 17492, 'epoch': 3} {'type': 'loss', 'content': 0.0565180778503418, 'timestamp': '2025-09-10 02:46:40.456295', 'step': 17493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:40.486574', 'step': 17493, 'epoch': 3} {'type': 'loss', 'content': 0.031874388456344604, 'timestamp': '2025-09-10 02:46:40.488679', 'step': 17494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:46:40.519286', 'step': 17494, 'epoch': 3} {'type': 'loss', 'content': 0.05519385263323784, 'timestamp': '2025-09-10 02:46:40.523467', 'step': 17495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:40.553041', 'step': 17495, 'epoch': 3} {'type': 'loss', 'content': 0.04457705467939377, 'timestamp': '2025-09-10 02:46:40.576506', 'step': 17496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:40.608218', 'step': 17496, 'epoch': 3} {'type': 'loss', 'content': 0.0387987457215786, 'timestamp': '2025-09-10 02:46:40.610332', 'step': 17497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:40.640513', 'step': 17497, 'epoch': 3} {'type': 'loss', 'content': 0.1135455071926117, 'timestamp': '2025-09-10 02:46:40.642461', 'step': 17498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:40.673061', 'step': 17498, 'epoch': 3} {'type': 'loss', 'content': 0.07401206344366074, 'timestamp': '2025-09-10 02:46:40.675686', 'step': 17499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:40.707404', 'step': 17499, 'epoch': 3} {'type': 'loss', 'content': 0.07265850156545639, 'timestamp': '2025-09-10 02:46:40.731781', 'step': 17500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17500', 'timestamp': '2025-09-10 02:46:45.550860', 'step': 17500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:45.583273', 'step': 17500, 'epoch': 3} {'type': 'loss', 'content': 0.057925183326005936, 'timestamp': '2025-09-10 02:46:45.585779', 'step': 17501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:45.617925', 'step': 17501, 'epoch': 3} {'type': 'loss', 'content': 0.057632897049188614, 'timestamp': '2025-09-10 02:46:45.619896', 'step': 17502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:45.650982', 'step': 17502, 'epoch': 3} {'type': 'loss', 'content': 0.07996091991662979, 'timestamp': '2025-09-10 02:46:45.653237', 'step': 17503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:45.683248', 'step': 17503, 'epoch': 3} {'type': 'loss', 'content': 0.11295326054096222, 'timestamp': '2025-09-10 02:46:45.706840', 'step': 17504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:45.737019', 'step': 17504, 'epoch': 3} {'type': 'loss', 'content': 0.05883701518177986, 'timestamp': '2025-09-10 02:46:45.739485', 'step': 17505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:45.770397', 'step': 17505, 'epoch': 3} {'type': 'loss', 'content': 0.0796433612704277, 'timestamp': '2025-09-10 02:46:45.773766', 'step': 17506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:45.804629', 'step': 17506, 'epoch': 3} {'type': 'loss', 'content': 0.11830996721982956, 'timestamp': '2025-09-10 02:46:45.806843', 'step': 17507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:45.836921', 'step': 17507, 'epoch': 3} {'type': 'loss', 'content': 0.05802242457866669, 'timestamp': '2025-09-10 02:46:45.860223', 'step': 17508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:45.891570', 'step': 17508, 'epoch': 3} {'type': 'loss', 'content': 0.04089265689253807, 'timestamp': '2025-09-10 02:46:45.893501', 'step': 17509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:45.923404', 'step': 17509, 'epoch': 3} {'type': 'loss', 'content': 0.04705580696463585, 'timestamp': '2025-09-10 02:46:45.925654', 'step': 17510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:45.956644', 'step': 17510, 'epoch': 3} {'type': 'loss', 'content': 0.06447312235832214, 'timestamp': '2025-09-10 02:46:45.958941', 'step': 17511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:45.989425', 'step': 17511, 'epoch': 3} {'type': 'loss', 'content': 0.0039360420778393745, 'timestamp': '2025-09-10 02:46:46.012764', 'step': 17512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:46.042816', 'step': 17512, 'epoch': 3} {'type': 'loss', 'content': 0.05516111105680466, 'timestamp': '2025-09-10 02:46:46.045100', 'step': 17513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:46.074982', 'step': 17513, 'epoch': 3} {'type': 'loss', 'content': 0.060166340321302414, 'timestamp': '2025-09-10 02:46:46.084234', 'step': 17514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:46.120658', 'step': 17514, 'epoch': 3} {'type': 'loss', 'content': 0.02888387255370617, 'timestamp': '2025-09-10 02:46:46.122828', 'step': 17515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:46.153300', 'step': 17515, 'epoch': 3} {'type': 'loss', 'content': 0.01881692372262478, 'timestamp': '2025-09-10 02:46:46.176921', 'step': 17516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:46.209008', 'step': 17516, 'epoch': 3} {'type': 'loss', 'content': 0.020230989903211594, 'timestamp': '2025-09-10 02:46:46.211100', 'step': 17517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:46.241022', 'step': 17517, 'epoch': 3} {'type': 'loss', 'content': 0.05649604648351669, 'timestamp': '2025-09-10 02:46:46.243196', 'step': 17518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:46.273487', 'step': 17518, 'epoch': 3} {'type': 'loss', 'content': 0.05222757160663605, 'timestamp': '2025-09-10 02:46:46.276300', 'step': 17519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:46.307928', 'step': 17519, 'epoch': 3} {'type': 'loss', 'content': 0.13850048184394836, 'timestamp': '2025-09-10 02:46:46.332087', 'step': 17520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:46.365919', 'step': 17520, 'epoch': 3} {'type': 'loss', 'content': 0.05138265714049339, 'timestamp': '2025-09-10 02:46:46.368381', 'step': 17521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:46.398461', 'step': 17521, 'epoch': 3} {'type': 'loss', 'content': 0.038687873631715775, 'timestamp': '2025-09-10 02:46:46.400555', 'step': 17522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:46.430704', 'step': 17522, 'epoch': 3} {'type': 'loss', 'content': 0.03528590127825737, 'timestamp': '2025-09-10 02:46:46.433187', 'step': 17523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:46.463681', 'step': 17523, 'epoch': 3} {'type': 'loss', 'content': 0.04992825537919998, 'timestamp': '2025-09-10 02:46:46.487091', 'step': 17524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:46.518381', 'step': 17524, 'epoch': 3} {'type': 'loss', 'content': 0.09288524091243744, 'timestamp': '2025-09-10 02:46:46.520772', 'step': 17525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:46.552224', 'step': 17525, 'epoch': 3} {'type': 'loss', 'content': 0.10123760253190994, 'timestamp': '2025-09-10 02:46:46.554621', 'step': 17526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:46.584558', 'step': 17526, 'epoch': 3} {'type': 'loss', 'content': 0.05016297101974487, 'timestamp': '2025-09-10 02:46:46.586976', 'step': 17527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:46.617784', 'step': 17527, 'epoch': 3} {'type': 'loss', 'content': 0.035413824021816254, 'timestamp': '2025-09-10 02:46:46.641304', 'step': 17528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:46.672484', 'step': 17528, 'epoch': 3} {'type': 'loss', 'content': 0.1559268683195114, 'timestamp': '2025-09-10 02:46:46.675016', 'step': 17529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:46.706239', 'step': 17529, 'epoch': 3} {'type': 'loss', 'content': 0.09210870414972305, 'timestamp': '2025-09-10 02:46:46.709179', 'step': 17530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:46.740949', 'step': 17530, 'epoch': 3} {'type': 'loss', 'content': 0.03794045373797417, 'timestamp': '2025-09-10 02:46:46.743408', 'step': 17531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:46.774591', 'step': 17531, 'epoch': 3} {'type': 'loss', 'content': 0.027575256302952766, 'timestamp': '2025-09-10 02:46:46.798173', 'step': 17532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:46.830388', 'step': 17532, 'epoch': 3} {'type': 'loss', 'content': 0.024751834571361542, 'timestamp': '2025-09-10 02:46:46.832796', 'step': 17533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:46.864876', 'step': 17533, 'epoch': 3} {'type': 'loss', 'content': 0.09429700672626495, 'timestamp': '2025-09-10 02:46:46.867242', 'step': 17534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:46.897794', 'step': 17534, 'epoch': 3} {'type': 'loss', 'content': 0.05636335164308548, 'timestamp': '2025-09-10 02:46:46.900272', 'step': 17535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:46.930927', 'step': 17535, 'epoch': 3} {'type': 'loss', 'content': 0.06281940639019012, 'timestamp': '2025-09-10 02:46:46.954495', 'step': 17536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:46.985367', 'step': 17536, 'epoch': 3} {'type': 'loss', 'content': 0.12238571792840958, 'timestamp': '2025-09-10 02:46:46.989231', 'step': 17537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:47.019645', 'step': 17537, 'epoch': 3} {'type': 'loss', 'content': 0.01590091735124588, 'timestamp': '2025-09-10 02:46:47.021812', 'step': 17538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:47.053874', 'step': 17538, 'epoch': 3} {'type': 'loss', 'content': 0.10961807519197464, 'timestamp': '2025-09-10 02:46:47.056265', 'step': 17539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:47.086473', 'step': 17539, 'epoch': 3} {'type': 'loss', 'content': 0.057946451008319855, 'timestamp': '2025-09-10 02:46:47.111897', 'step': 17540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:47.143449', 'step': 17540, 'epoch': 3} {'type': 'loss', 'content': 0.07423096150159836, 'timestamp': '2025-09-10 02:46:47.145372', 'step': 17541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:47.175603', 'step': 17541, 'epoch': 3} {'type': 'loss', 'content': 0.028300657868385315, 'timestamp': '2025-09-10 02:46:47.178102', 'step': 17542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:47.208340', 'step': 17542, 'epoch': 3} {'type': 'loss', 'content': 0.0692208781838417, 'timestamp': '2025-09-10 02:46:47.210913', 'step': 17543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:47.241868', 'step': 17543, 'epoch': 3} {'type': 'loss', 'content': 0.06535062938928604, 'timestamp': '2025-09-10 02:46:47.265371', 'step': 17544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:47.295312', 'step': 17544, 'epoch': 3} {'type': 'loss', 'content': 0.043489787727594376, 'timestamp': '2025-09-10 02:46:47.297663', 'step': 17545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:47.328740', 'step': 17545, 'epoch': 3} {'type': 'loss', 'content': 0.07311956584453583, 'timestamp': '2025-09-10 02:46:47.331229', 'step': 17546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:47.361234', 'step': 17546, 'epoch': 3} {'type': 'loss', 'content': 0.04069390520453453, 'timestamp': '2025-09-10 02:46:47.363555', 'step': 17547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:47.393544', 'step': 17547, 'epoch': 3} {'type': 'loss', 'content': 0.04138663411140442, 'timestamp': '2025-09-10 02:46:47.417958', 'step': 17548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:47.448312', 'step': 17548, 'epoch': 3} {'type': 'loss', 'content': 0.05398678779602051, 'timestamp': '2025-09-10 02:46:47.450923', 'step': 17549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:47.481139', 'step': 17549, 'epoch': 3} {'type': 'loss', 'content': 0.04217547923326492, 'timestamp': '2025-09-10 02:46:47.483680', 'step': 17550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:47.514579', 'step': 17550, 'epoch': 3} {'type': 'loss', 'content': 0.07126139104366302, 'timestamp': '2025-09-10 02:46:47.517305', 'step': 17551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:47.547618', 'step': 17551, 'epoch': 3} {'type': 'loss', 'content': 0.05749216303229332, 'timestamp': '2025-09-10 02:46:47.571198', 'step': 17552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:47.602043', 'step': 17552, 'epoch': 3} {'type': 'loss', 'content': 0.03937966749072075, 'timestamp': '2025-09-10 02:46:47.604765', 'step': 17553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:47.636602', 'step': 17553, 'epoch': 3} {'type': 'loss', 'content': 0.09338937699794769, 'timestamp': '2025-09-10 02:46:47.639911', 'step': 17554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:47.670691', 'step': 17554, 'epoch': 3} {'type': 'loss', 'content': 0.04479542374610901, 'timestamp': '2025-09-10 02:46:47.672769', 'step': 17555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:47.702528', 'step': 17555, 'epoch': 3} {'type': 'loss', 'content': 0.040110550820827484, 'timestamp': '2025-09-10 02:46:47.725944', 'step': 17556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:47.756585', 'step': 17556, 'epoch': 3} {'type': 'loss', 'content': 0.09436871111392975, 'timestamp': '2025-09-10 02:46:47.758957', 'step': 17557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:47.789206', 'step': 17557, 'epoch': 3} {'type': 'loss', 'content': 0.023762868717312813, 'timestamp': '2025-09-10 02:46:47.791330', 'step': 17558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:47.823172', 'step': 17558, 'epoch': 3} {'type': 'loss', 'content': 0.09822610020637512, 'timestamp': '2025-09-10 02:46:47.825842', 'step': 17559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:47.857356', 'step': 17559, 'epoch': 3} {'type': 'loss', 'content': 0.0932626947760582, 'timestamp': '2025-09-10 02:46:47.880719', 'step': 17560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:47.928011', 'step': 17560, 'epoch': 3} {'type': 'loss', 'content': 0.06351775676012039, 'timestamp': '2025-09-10 02:46:47.930051', 'step': 17561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:47.961306', 'step': 17561, 'epoch': 3} {'type': 'loss', 'content': 0.07846558094024658, 'timestamp': '2025-09-10 02:46:47.963500', 'step': 17562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:47.993071', 'step': 17562, 'epoch': 3} {'type': 'loss', 'content': 0.041144050657749176, 'timestamp': '2025-09-10 02:46:47.995428', 'step': 17563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:48.027394', 'step': 17563, 'epoch': 3} {'type': 'loss', 'content': 0.036402925848960876, 'timestamp': '2025-09-10 02:46:48.051006', 'step': 17564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:48.082735', 'step': 17564, 'epoch': 3} {'type': 'loss', 'content': 0.05601005256175995, 'timestamp': '2025-09-10 02:46:48.088689', 'step': 17565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:48.120235', 'step': 17565, 'epoch': 3} {'type': 'loss', 'content': 0.07198778539896011, 'timestamp': '2025-09-10 02:46:48.122380', 'step': 17566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:48.152792', 'step': 17566, 'epoch': 3} {'type': 'loss', 'content': 0.06807225197553635, 'timestamp': '2025-09-10 02:46:48.155657', 'step': 17567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:48.186359', 'step': 17567, 'epoch': 3} {'type': 'loss', 'content': 0.011074941605329514, 'timestamp': '2025-09-10 02:46:48.209957', 'step': 17568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.240145', 'step': 17568, 'epoch': 3} {'type': 'loss', 'content': 0.040529441088438034, 'timestamp': '2025-09-10 02:46:48.243914', 'step': 17569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.276297', 'step': 17569, 'epoch': 3} {'type': 'loss', 'content': 0.05186218023300171, 'timestamp': '2025-09-10 02:46:48.278699', 'step': 17570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.313129', 'step': 17570, 'epoch': 3} {'type': 'loss', 'content': 0.07523943483829498, 'timestamp': '2025-09-10 02:46:48.315193', 'step': 17571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:48.344913', 'step': 17571, 'epoch': 3} {'type': 'loss', 'content': 0.007688689511269331, 'timestamp': '2025-09-10 02:46:48.368241', 'step': 17572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:48.400357', 'step': 17572, 'epoch': 3} {'type': 'loss', 'content': 0.11614692211151123, 'timestamp': '2025-09-10 02:46:48.402769', 'step': 17573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.434001', 'step': 17573, 'epoch': 3} {'type': 'loss', 'content': 0.006865672767162323, 'timestamp': '2025-09-10 02:46:48.436282', 'step': 17574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:48.466132', 'step': 17574, 'epoch': 3} {'type': 'loss', 'content': 0.05432538315653801, 'timestamp': '2025-09-10 02:46:48.468648', 'step': 17575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.498710', 'step': 17575, 'epoch': 3} {'type': 'loss', 'content': 0.036613401025533676, 'timestamp': '2025-09-10 02:46:48.522086', 'step': 17576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:46:48.552379', 'step': 17576, 'epoch': 3} {'type': 'loss', 'content': 0.05612054467201233, 'timestamp': '2025-09-10 02:46:48.554703', 'step': 17577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:48.584733', 'step': 17577, 'epoch': 3} {'type': 'loss', 'content': 0.025456154718995094, 'timestamp': '2025-09-10 02:46:48.587427', 'step': 17578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.618766', 'step': 17578, 'epoch': 3} {'type': 'loss', 'content': 0.01059744507074356, 'timestamp': '2025-09-10 02:46:48.620703', 'step': 17579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:48.650291', 'step': 17579, 'epoch': 3} {'type': 'loss', 'content': 0.07512108236551285, 'timestamp': '2025-09-10 02:46:48.673816', 'step': 17580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:48.705846', 'step': 17580, 'epoch': 3} {'type': 'loss', 'content': 0.08046606928110123, 'timestamp': '2025-09-10 02:46:48.708284', 'step': 17581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.741378', 'step': 17581, 'epoch': 3} {'type': 'loss', 'content': 0.02658013440668583, 'timestamp': '2025-09-10 02:46:48.743854', 'step': 17582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:48.774967', 'step': 17582, 'epoch': 3} {'type': 'loss', 'content': 0.06246855482459068, 'timestamp': '2025-09-10 02:46:48.777555', 'step': 17583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.809899', 'step': 17583, 'epoch': 3} {'type': 'loss', 'content': 0.11539115756750107, 'timestamp': '2025-09-10 02:46:48.835099', 'step': 17584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.865528', 'step': 17584, 'epoch': 3} {'type': 'loss', 'content': 0.07304711639881134, 'timestamp': '2025-09-10 02:46:48.867890', 'step': 17585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:48.897380', 'step': 17585, 'epoch': 3} {'type': 'loss', 'content': 0.0475345179438591, 'timestamp': '2025-09-10 02:46:48.899721', 'step': 17586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:48.930505', 'step': 17586, 'epoch': 3} {'type': 'loss', 'content': 0.11682820320129395, 'timestamp': '2025-09-10 02:46:48.932762', 'step': 17587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:48.963756', 'step': 17587, 'epoch': 3} {'type': 'loss', 'content': 0.05283816531300545, 'timestamp': '2025-09-10 02:46:48.986971', 'step': 17588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:49.017290', 'step': 17588, 'epoch': 3} {'type': 'loss', 'content': 0.06987789273262024, 'timestamp': '2025-09-10 02:46:49.019916', 'step': 17589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:49.050629', 'step': 17589, 'epoch': 3} {'type': 'loss', 'content': 0.04581300541758537, 'timestamp': '2025-09-10 02:46:49.053372', 'step': 17590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:49.083812', 'step': 17590, 'epoch': 3} {'type': 'loss', 'content': 0.06732381880283356, 'timestamp': '2025-09-10 02:46:49.090896', 'step': 17591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:49.128401', 'step': 17591, 'epoch': 3} {'type': 'loss', 'content': 0.07337969541549683, 'timestamp': '2025-09-10 02:46:49.151917', 'step': 17592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:49.183339', 'step': 17592, 'epoch': 3} {'type': 'loss', 'content': 0.08970251679420471, 'timestamp': '2025-09-10 02:46:49.185502', 'step': 17593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:49.215654', 'step': 17593, 'epoch': 3} {'type': 'loss', 'content': 0.10693036764860153, 'timestamp': '2025-09-10 02:46:49.220340', 'step': 17594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:49.250513', 'step': 17594, 'epoch': 3} {'type': 'loss', 'content': 0.07746095210313797, 'timestamp': '2025-09-10 02:46:49.252870', 'step': 17595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:46:49.283437', 'step': 17595, 'epoch': 3} {'type': 'loss', 'content': 0.017136450856924057, 'timestamp': '2025-09-10 02:46:49.308791', 'step': 17596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:49.339172', 'step': 17596, 'epoch': 3} {'type': 'loss', 'content': 0.05500015616416931, 'timestamp': '2025-09-10 02:46:49.342087', 'step': 17597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:49.373122', 'step': 17597, 'epoch': 3} {'type': 'loss', 'content': 0.06511294841766357, 'timestamp': '2025-09-10 02:46:49.379341', 'step': 17598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:49.415110', 'step': 17598, 'epoch': 3} {'type': 'loss', 'content': 0.03633466735482216, 'timestamp': '2025-09-10 02:46:49.418129', 'step': 17599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:49.454306', 'step': 17599, 'epoch': 3} {'type': 'loss', 'content': 0.11494825780391693, 'timestamp': '2025-09-10 02:46:49.480437', 'step': 17600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:49.516343', 'step': 17600, 'epoch': 3} {'type': 'loss', 'content': 0.012700753286480904, 'timestamp': '2025-09-10 02:46:49.523144', 'step': 17601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:49.561215', 'step': 17601, 'epoch': 3} {'type': 'loss', 'content': 0.06705176085233688, 'timestamp': '2025-09-10 02:46:49.569712', 'step': 17602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:49.618579', 'step': 17602, 'epoch': 3} {'type': 'loss', 'content': 0.07874506711959839, 'timestamp': '2025-09-10 02:46:49.622823', 'step': 17603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:49.665996', 'step': 17603, 'epoch': 3} {'type': 'loss', 'content': 0.0897190198302269, 'timestamp': '2025-09-10 02:46:49.694600', 'step': 17604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:49.728585', 'step': 17604, 'epoch': 3} {'type': 'loss', 'content': 0.04702751338481903, 'timestamp': '2025-09-10 02:46:49.730712', 'step': 17605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:49.761243', 'step': 17605, 'epoch': 3} {'type': 'loss', 'content': 0.06008216738700867, 'timestamp': '2025-09-10 02:46:49.763327', 'step': 17606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:49.793193', 'step': 17606, 'epoch': 3} {'type': 'loss', 'content': 0.04607468843460083, 'timestamp': '2025-09-10 02:46:49.795723', 'step': 17607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:49.826120', 'step': 17607, 'epoch': 3} {'type': 'loss', 'content': 0.04105371609330177, 'timestamp': '2025-09-10 02:46:49.849246', 'step': 17608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:49.880591', 'step': 17608, 'epoch': 3} {'type': 'loss', 'content': 0.041539911180734634, 'timestamp': '2025-09-10 02:46:49.882659', 'step': 17609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:49.913298', 'step': 17609, 'epoch': 3} {'type': 'loss', 'content': 0.09332246333360672, 'timestamp': '2025-09-10 02:46:49.915719', 'step': 17610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:49.945734', 'step': 17610, 'epoch': 3} {'type': 'loss', 'content': 0.08661425113677979, 'timestamp': '2025-09-10 02:46:49.948024', 'step': 17611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:49.978316', 'step': 17611, 'epoch': 3} {'type': 'loss', 'content': 0.08724065124988556, 'timestamp': '2025-09-10 02:46:50.001828', 'step': 17612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:50.032122', 'step': 17612, 'epoch': 3} {'type': 'loss', 'content': 0.021867116913199425, 'timestamp': '2025-09-10 02:46:50.034461', 'step': 17613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:50.064879', 'step': 17613, 'epoch': 3} {'type': 'loss', 'content': 0.10029742866754532, 'timestamp': '2025-09-10 02:46:50.067256', 'step': 17614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:50.097222', 'step': 17614, 'epoch': 3} {'type': 'loss', 'content': 0.0754210576415062, 'timestamp': '2025-09-10 02:46:50.099449', 'step': 17615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:50.128845', 'step': 17615, 'epoch': 3} {'type': 'loss', 'content': 0.05121992900967598, 'timestamp': '2025-09-10 02:46:50.152456', 'step': 17616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:50.182693', 'step': 17616, 'epoch': 3} {'type': 'loss', 'content': 0.07740160822868347, 'timestamp': '2025-09-10 02:46:50.184838', 'step': 17617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:50.214595', 'step': 17617, 'epoch': 3} {'type': 'loss', 'content': 0.0597284734249115, 'timestamp': '2025-09-10 02:46:50.217900', 'step': 17618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:50.247696', 'step': 17618, 'epoch': 3} {'type': 'loss', 'content': 0.1537194699048996, 'timestamp': '2025-09-10 02:46:50.250405', 'step': 17619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:50.280490', 'step': 17619, 'epoch': 3} {'type': 'loss', 'content': 0.0532396137714386, 'timestamp': '2025-09-10 02:46:50.303751', 'step': 17620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:50.334863', 'step': 17620, 'epoch': 3} {'type': 'loss', 'content': 0.015401030890643597, 'timestamp': '2025-09-10 02:46:50.336965', 'step': 17621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:50.368782', 'step': 17621, 'epoch': 3} {'type': 'loss', 'content': 0.042486753314733505, 'timestamp': '2025-09-10 02:46:50.371100', 'step': 17622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:50.402354', 'step': 17622, 'epoch': 3} {'type': 'loss', 'content': 0.07965505868196487, 'timestamp': '2025-09-10 02:46:50.404786', 'step': 17623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:50.434821', 'step': 17623, 'epoch': 3} {'type': 'loss', 'content': 0.07863122224807739, 'timestamp': '2025-09-10 02:46:50.458210', 'step': 17624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:50.488866', 'step': 17624, 'epoch': 3} {'type': 'loss', 'content': 0.11589118838310242, 'timestamp': '2025-09-10 02:46:50.491287', 'step': 17625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:50.521731', 'step': 17625, 'epoch': 3} {'type': 'loss', 'content': 0.05783263221383095, 'timestamp': '2025-09-10 02:46:50.524255', 'step': 17626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:50.553302', 'step': 17626, 'epoch': 3} {'type': 'loss', 'content': 0.08079562336206436, 'timestamp': '2025-09-10 02:46:50.555608', 'step': 17627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:50.585997', 'step': 17627, 'epoch': 3} {'type': 'loss', 'content': 0.04543689265847206, 'timestamp': '2025-09-10 02:46:50.609685', 'step': 17628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:50.640143', 'step': 17628, 'epoch': 3} {'type': 'loss', 'content': 0.06126448139548302, 'timestamp': '2025-09-10 02:46:50.642375', 'step': 17629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:50.673132', 'step': 17629, 'epoch': 3} {'type': 'loss', 'content': 0.15106622874736786, 'timestamp': '2025-09-10 02:46:50.675366', 'step': 17630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:50.705789', 'step': 17630, 'epoch': 3} {'type': 'loss', 'content': 0.07426440715789795, 'timestamp': '2025-09-10 02:46:50.708191', 'step': 17631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:50.738364', 'step': 17631, 'epoch': 3} {'type': 'loss', 'content': 0.14307735860347748, 'timestamp': '2025-09-10 02:46:50.762032', 'step': 17632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:50.792536', 'step': 17632, 'epoch': 3} {'type': 'loss', 'content': 0.12382261455059052, 'timestamp': '2025-09-10 02:46:50.794912', 'step': 17633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:50.826425', 'step': 17633, 'epoch': 3} {'type': 'loss', 'content': 0.02892841026186943, 'timestamp': '2025-09-10 02:46:50.828575', 'step': 17634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:50.858299', 'step': 17634, 'epoch': 3} {'type': 'loss', 'content': 0.07115355879068375, 'timestamp': '2025-09-10 02:46:50.860694', 'step': 17635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:50.891516', 'step': 17635, 'epoch': 3} {'type': 'loss', 'content': 0.05265716463327408, 'timestamp': '2025-09-10 02:46:50.915313', 'step': 17636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:50.947335', 'step': 17636, 'epoch': 3} {'type': 'loss', 'content': 0.07029438018798828, 'timestamp': '2025-09-10 02:46:50.949426', 'step': 17637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:50.979745', 'step': 17637, 'epoch': 3} {'type': 'loss', 'content': 0.08262300491333008, 'timestamp': '2025-09-10 02:46:50.982683', 'step': 17638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:51.015234', 'step': 17638, 'epoch': 3} {'type': 'loss', 'content': 0.0373457707464695, 'timestamp': '2025-09-10 02:46:51.017809', 'step': 17639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:51.049135', 'step': 17639, 'epoch': 3} {'type': 'loss', 'content': 0.18070852756500244, 'timestamp': '2025-09-10 02:46:51.072934', 'step': 17640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:51.104364', 'step': 17640, 'epoch': 3} {'type': 'loss', 'content': 0.0743684247136116, 'timestamp': '2025-09-10 02:46:51.107621', 'step': 17641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:51.138885', 'step': 17641, 'epoch': 3} {'type': 'loss', 'content': 0.07445884495973587, 'timestamp': '2025-09-10 02:46:51.145269', 'step': 17642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:51.189817', 'step': 17642, 'epoch': 3} {'type': 'loss', 'content': 0.07278256863355637, 'timestamp': '2025-09-10 02:46:51.191881', 'step': 17643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:51.222073', 'step': 17643, 'epoch': 3} {'type': 'loss', 'content': 0.05759798735380173, 'timestamp': '2025-09-10 02:46:51.246098', 'step': 17644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:51.279958', 'step': 17644, 'epoch': 3} {'type': 'loss', 'content': 0.09563447535037994, 'timestamp': '2025-09-10 02:46:51.282188', 'step': 17645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:51.315883', 'step': 17645, 'epoch': 3} {'type': 'loss', 'content': 0.04560157656669617, 'timestamp': '2025-09-10 02:46:51.318152', 'step': 17646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:51.357278', 'step': 17646, 'epoch': 3} {'type': 'loss', 'content': 0.08867691457271576, 'timestamp': '2025-09-10 02:46:51.359435', 'step': 17647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:51.390160', 'step': 17647, 'epoch': 3} {'type': 'loss', 'content': 0.07381130009889603, 'timestamp': '2025-09-10 02:46:51.414762', 'step': 17648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:51.447296', 'step': 17648, 'epoch': 3} {'type': 'loss', 'content': 0.05259963497519493, 'timestamp': '2025-09-10 02:46:51.451492', 'step': 17649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:51.483036', 'step': 17649, 'epoch': 3} {'type': 'loss', 'content': 0.06125083565711975, 'timestamp': '2025-09-10 02:46:51.485035', 'step': 17650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:51.516611', 'step': 17650, 'epoch': 3} {'type': 'loss', 'content': 0.15495526790618896, 'timestamp': '2025-09-10 02:46:51.519346', 'step': 17651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:51.553730', 'step': 17651, 'epoch': 3} {'type': 'loss', 'content': 0.10218508541584015, 'timestamp': '2025-09-10 02:46:51.580278', 'step': 17652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:51.614210', 'step': 17652, 'epoch': 3} {'type': 'loss', 'content': 0.10398980975151062, 'timestamp': '2025-09-10 02:46:51.619823', 'step': 17653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:51.652961', 'step': 17653, 'epoch': 3} {'type': 'loss', 'content': 0.06369177997112274, 'timestamp': '2025-09-10 02:46:51.657873', 'step': 17654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:51.698365', 'step': 17654, 'epoch': 3} {'type': 'loss', 'content': 0.1838761419057846, 'timestamp': '2025-09-10 02:46:51.700436', 'step': 17655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:51.734130', 'step': 17655, 'epoch': 3} {'type': 'loss', 'content': 0.08642838895320892, 'timestamp': '2025-09-10 02:46:51.757745', 'step': 17656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:51.789025', 'step': 17656, 'epoch': 3} {'type': 'loss', 'content': 0.02199595607817173, 'timestamp': '2025-09-10 02:46:51.792582', 'step': 17657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:51.825126', 'step': 17657, 'epoch': 3} {'type': 'loss', 'content': 0.08224121481180191, 'timestamp': '2025-09-10 02:46:51.827230', 'step': 17658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:51.857526', 'step': 17658, 'epoch': 3} {'type': 'loss', 'content': 0.05060015991330147, 'timestamp': '2025-09-10 02:46:51.861118', 'step': 17659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:51.900946', 'step': 17659, 'epoch': 3} {'type': 'loss', 'content': 0.1020246297121048, 'timestamp': '2025-09-10 02:46:51.926129', 'step': 17660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:51.962399', 'step': 17660, 'epoch': 3} {'type': 'loss', 'content': 0.09944777190685272, 'timestamp': '2025-09-10 02:46:51.968682', 'step': 17661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:52.007517', 'step': 17661, 'epoch': 3} {'type': 'loss', 'content': 0.024493666365742683, 'timestamp': '2025-09-10 02:46:52.009844', 'step': 17662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:52.046813', 'step': 17662, 'epoch': 3} {'type': 'loss', 'content': 0.04740191623568535, 'timestamp': '2025-09-10 02:46:52.052078', 'step': 17663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:52.088053', 'step': 17663, 'epoch': 3} {'type': 'loss', 'content': 0.05796688422560692, 'timestamp': '2025-09-10 02:46:52.112208', 'step': 17664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:52.149524', 'step': 17664, 'epoch': 3} {'type': 'loss', 'content': 0.0800221711397171, 'timestamp': '2025-09-10 02:46:52.152114', 'step': 17665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:52.192774', 'step': 17665, 'epoch': 3} {'type': 'loss', 'content': 0.05626649037003517, 'timestamp': '2025-09-10 02:46:52.195925', 'step': 17666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:52.237695', 'step': 17666, 'epoch': 3} {'type': 'loss', 'content': 0.07114994525909424, 'timestamp': '2025-09-10 02:46:52.240590', 'step': 17667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:52.271117', 'step': 17667, 'epoch': 3} {'type': 'loss', 'content': 0.06123211979866028, 'timestamp': '2025-09-10 02:46:52.294626', 'step': 17668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:46:52.327555', 'step': 17668, 'epoch': 3} {'type': 'loss', 'content': 0.09732334315776825, 'timestamp': '2025-09-10 02:46:52.332220', 'step': 17669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:52.369521', 'step': 17669, 'epoch': 3} {'type': 'loss', 'content': 0.06870349496603012, 'timestamp': '2025-09-10 02:46:52.371917', 'step': 17670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:52.404864', 'step': 17670, 'epoch': 3} {'type': 'loss', 'content': 0.047430865466594696, 'timestamp': '2025-09-10 02:46:52.409239', 'step': 17671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:52.446005', 'step': 17671, 'epoch': 3} {'type': 'loss', 'content': 0.04774254560470581, 'timestamp': '2025-09-10 02:46:52.472744', 'step': 17672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:52.506064', 'step': 17672, 'epoch': 3} {'type': 'loss', 'content': 0.011415095068514347, 'timestamp': '2025-09-10 02:46:52.510199', 'step': 17673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:52.552490', 'step': 17673, 'epoch': 3} {'type': 'loss', 'content': 0.14684543013572693, 'timestamp': '2025-09-10 02:46:52.555345', 'step': 17674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:52.588212', 'step': 17674, 'epoch': 3} {'type': 'loss', 'content': 0.10947691649198532, 'timestamp': '2025-09-10 02:46:52.592828', 'step': 17675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:52.635312', 'step': 17675, 'epoch': 3} {'type': 'loss', 'content': 0.056077949702739716, 'timestamp': '2025-09-10 02:46:52.663704', 'step': 17676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:52.705930', 'step': 17676, 'epoch': 3} {'type': 'loss', 'content': 0.037287402898073196, 'timestamp': '2025-09-10 02:46:52.709666', 'step': 17677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:52.741891', 'step': 17677, 'epoch': 3} {'type': 'loss', 'content': 0.08537642657756805, 'timestamp': '2025-09-10 02:46:52.745165', 'step': 17678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:52.781963', 'step': 17678, 'epoch': 3} {'type': 'loss', 'content': 0.06252066045999527, 'timestamp': '2025-09-10 02:46:52.784999', 'step': 17679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:52.828478', 'step': 17679, 'epoch': 3} {'type': 'loss', 'content': 0.04162093624472618, 'timestamp': '2025-09-10 02:46:52.860404', 'step': 17680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:52.894358', 'step': 17680, 'epoch': 3} {'type': 'loss', 'content': 0.13599209487438202, 'timestamp': '2025-09-10 02:46:52.896893', 'step': 17681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:52.927982', 'step': 17681, 'epoch': 3} {'type': 'loss', 'content': 0.06659314781427383, 'timestamp': '2025-09-10 02:46:52.930288', 'step': 17682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:46:52.964269', 'step': 17682, 'epoch': 3} {'type': 'loss', 'content': 0.03656893968582153, 'timestamp': '2025-09-10 02:46:52.968247', 'step': 17683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:52.998080', 'step': 17683, 'epoch': 3} {'type': 'loss', 'content': 0.10675860196352005, 'timestamp': '2025-09-10 02:46:53.021457', 'step': 17684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.054244', 'step': 17684, 'epoch': 3} {'type': 'loss', 'content': 0.09852565079927444, 'timestamp': '2025-09-10 02:46:53.056485', 'step': 17685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:53.087486', 'step': 17685, 'epoch': 3} {'type': 'loss', 'content': 0.11778727173805237, 'timestamp': '2025-09-10 02:46:53.090246', 'step': 17686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:53.120942', 'step': 17686, 'epoch': 3} {'type': 'loss', 'content': 0.07588957250118256, 'timestamp': '2025-09-10 02:46:53.123771', 'step': 17687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.154900', 'step': 17687, 'epoch': 3} {'type': 'loss', 'content': 0.018928075209259987, 'timestamp': '2025-09-10 02:46:53.178343', 'step': 17688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.211902', 'step': 17688, 'epoch': 3} {'type': 'loss', 'content': 0.11244912445545197, 'timestamp': '2025-09-10 02:46:53.214785', 'step': 17689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:53.247570', 'step': 17689, 'epoch': 3} {'type': 'loss', 'content': 0.1162385642528534, 'timestamp': '2025-09-10 02:46:53.252145', 'step': 17690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.286517', 'step': 17690, 'epoch': 3} {'type': 'loss', 'content': 0.0445459708571434, 'timestamp': '2025-09-10 02:46:53.288838', 'step': 17691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.318611', 'step': 17691, 'epoch': 3} {'type': 'loss', 'content': 0.15656495094299316, 'timestamp': '2025-09-10 02:46:53.341961', 'step': 17692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:46:53.381860', 'step': 17692, 'epoch': 3} {'type': 'loss', 'content': 0.023245995864272118, 'timestamp': '2025-09-10 02:46:53.384321', 'step': 17693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:53.414201', 'step': 17693, 'epoch': 3} {'type': 'loss', 'content': 0.1285761594772339, 'timestamp': '2025-09-10 02:46:53.417465', 'step': 17694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:46:53.450036', 'step': 17694, 'epoch': 3} {'type': 'loss', 'content': 0.10621236264705658, 'timestamp': '2025-09-10 02:46:53.452708', 'step': 17695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:53.482646', 'step': 17695, 'epoch': 3} {'type': 'loss', 'content': 0.04601110517978668, 'timestamp': '2025-09-10 02:46:53.509046', 'step': 17696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:46:53.539703', 'step': 17696, 'epoch': 3} {'type': 'loss', 'content': 0.12078876793384552, 'timestamp': '2025-09-10 02:46:53.542526', 'step': 17697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.574291', 'step': 17697, 'epoch': 3} {'type': 'loss', 'content': 0.08028930425643921, 'timestamp': '2025-09-10 02:46:53.576776', 'step': 17698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.607394', 'step': 17698, 'epoch': 3} {'type': 'loss', 'content': 0.08052760362625122, 'timestamp': '2025-09-10 02:46:53.610842', 'step': 17699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.642418', 'step': 17699, 'epoch': 3} {'type': 'loss', 'content': 0.083113893866539, 'timestamp': '2025-09-10 02:46:53.666081', 'step': 17700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:53.697015', 'step': 17700, 'epoch': 3} {'type': 'loss', 'content': 0.05879173055291176, 'timestamp': '2025-09-10 02:46:53.699825', 'step': 17701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:53.729861', 'step': 17701, 'epoch': 3} {'type': 'loss', 'content': 0.11332180351018906, 'timestamp': '2025-09-10 02:46:53.731741', 'step': 17702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:53.762062', 'step': 17702, 'epoch': 3} {'type': 'loss', 'content': 0.10963782668113708, 'timestamp': '2025-09-10 02:46:53.765617', 'step': 17703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:46:53.799255', 'step': 17703, 'epoch': 3} {'type': 'loss', 'content': 0.09988167136907578, 'timestamp': '2025-09-10 02:46:53.822623', 'step': 17704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.853671', 'step': 17704, 'epoch': 3} {'type': 'loss', 'content': 0.029948608949780464, 'timestamp': '2025-09-10 02:46:53.856000', 'step': 17705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.886613', 'step': 17705, 'epoch': 3} {'type': 'loss', 'content': 0.12410096824169159, 'timestamp': '2025-09-10 02:46:53.889971', 'step': 17706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.921054', 'step': 17706, 'epoch': 3} {'type': 'loss', 'content': 0.09582854062318802, 'timestamp': '2025-09-10 02:46:53.923130', 'step': 17707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:46:53.953597', 'step': 17707, 'epoch': 3} {'type': 'loss', 'content': 0.061073627322912216, 'timestamp': '2025-09-10 02:46:53.977626', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:47:01.777884', 'step': 17708, 'epoch': 3} {'type': 'pplx', 'content': 11313.83659078076, 'timestamp': '2025-09-10 02:47:01.781585', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:01.811374', 'step': 17708, 'epoch': 3} {'type': 'loss', 'content': 0.056778836995363235, 'timestamp': '2025-09-10 02:47:01.813440', 'step': 17709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:01.843113', 'step': 17709, 'epoch': 3} {'type': 'loss', 'content': 0.09930401295423508, 'timestamp': '2025-09-10 02:47:01.845450', 'step': 17710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:01.875117', 'step': 17710, 'epoch': 3} {'type': 'loss', 'content': 0.05317428708076477, 'timestamp': '2025-09-10 02:47:01.890576', 'step': 17711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:01.921291', 'step': 17711, 'epoch': 3} {'type': 'loss', 'content': 0.16157180070877075, 'timestamp': '2025-09-10 02:47:01.944987', 'step': 17712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:01.974884', 'step': 17712, 'epoch': 3} {'type': 'loss', 'content': 0.06799879670143127, 'timestamp': '2025-09-10 02:47:01.977530', 'step': 17713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:02.008619', 'step': 17713, 'epoch': 3} {'type': 'loss', 'content': 0.09642001241445541, 'timestamp': '2025-09-10 02:47:02.011344', 'step': 17714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:02.042027', 'step': 17714, 'epoch': 3} {'type': 'loss', 'content': 0.05856803432106972, 'timestamp': '2025-09-10 02:47:02.044294', 'step': 17715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:02.074224', 'step': 17715, 'epoch': 3} {'type': 'loss', 'content': 0.05282066762447357, 'timestamp': '2025-09-10 02:47:02.097948', 'step': 17716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:02.128364', 'step': 17716, 'epoch': 3} {'type': 'loss', 'content': 0.0560159869492054, 'timestamp': '2025-09-10 02:47:02.133032', 'step': 17717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:02.164755', 'step': 17717, 'epoch': 3} {'type': 'loss', 'content': 0.08036964386701584, 'timestamp': '2025-09-10 02:47:02.169184', 'step': 17718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:02.200639', 'step': 17718, 'epoch': 3} {'type': 'loss', 'content': 0.00913961511105299, 'timestamp': '2025-09-10 02:47:02.202959', 'step': 17719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:02.233193', 'step': 17719, 'epoch': 3} {'type': 'loss', 'content': 0.0879078358411789, 'timestamp': '2025-09-10 02:47:02.256951', 'step': 17720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:02.286300', 'step': 17720, 'epoch': 3} {'type': 'loss', 'content': 0.018638793379068375, 'timestamp': '2025-09-10 02:47:02.288575', 'step': 17721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:02.319882', 'step': 17721, 'epoch': 3} {'type': 'loss', 'content': 0.12000717967748642, 'timestamp': '2025-09-10 02:47:02.322276', 'step': 17722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:02.352323', 'step': 17722, 'epoch': 3} {'type': 'loss', 'content': 0.04316330701112747, 'timestamp': '2025-09-10 02:47:02.354395', 'step': 17723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:02.384535', 'step': 17723, 'epoch': 3} {'type': 'loss', 'content': 0.10536880791187286, 'timestamp': '2025-09-10 02:47:02.409211', 'step': 17724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:02.440654', 'step': 17724, 'epoch': 3} {'type': 'loss', 'content': 0.0989697277545929, 'timestamp': '2025-09-10 02:47:02.444772', 'step': 17725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:02.477639', 'step': 17725, 'epoch': 3} {'type': 'loss', 'content': 0.047678884118795395, 'timestamp': '2025-09-10 02:47:02.480383', 'step': 17726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:02.511726', 'step': 17726, 'epoch': 3} {'type': 'loss', 'content': 0.14289115369319916, 'timestamp': '2025-09-10 02:47:02.514034', 'step': 17727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:02.548424', 'step': 17727, 'epoch': 3} {'type': 'loss', 'content': 0.09011179208755493, 'timestamp': '2025-09-10 02:47:02.572770', 'step': 17728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:02.604530', 'step': 17728, 'epoch': 3} {'type': 'loss', 'content': 0.0926368236541748, 'timestamp': '2025-09-10 02:47:02.609706', 'step': 17729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:02.641130', 'step': 17729, 'epoch': 3} {'type': 'loss', 'content': 0.10345843434333801, 'timestamp': '2025-09-10 02:47:02.651244', 'step': 17730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:02.696326', 'step': 17730, 'epoch': 3} {'type': 'loss', 'content': 0.04453254118561745, 'timestamp': '2025-09-10 02:47:02.702031', 'step': 17731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:02.736700', 'step': 17731, 'epoch': 3} {'type': 'loss', 'content': 0.00914017204195261, 'timestamp': '2025-09-10 02:47:02.760395', 'step': 17732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:02.790345', 'step': 17732, 'epoch': 3} {'type': 'loss', 'content': 0.05853530019521713, 'timestamp': '2025-09-10 02:47:02.793818', 'step': 17733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:02.824183', 'step': 17733, 'epoch': 3} {'type': 'loss', 'content': 0.042681459337472916, 'timestamp': '2025-09-10 02:47:02.827086', 'step': 17734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:02.859325', 'step': 17734, 'epoch': 3} {'type': 'loss', 'content': 0.04707025736570358, 'timestamp': '2025-09-10 02:47:02.862195', 'step': 17735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:02.897952', 'step': 17735, 'epoch': 3} {'type': 'loss', 'content': 0.0697048082947731, 'timestamp': '2025-09-10 02:47:02.924216', 'step': 17736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:02.955429', 'step': 17736, 'epoch': 3} {'type': 'loss', 'content': 0.03123711794614792, 'timestamp': '2025-09-10 02:47:02.958029', 'step': 17737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:02.988338', 'step': 17737, 'epoch': 3} {'type': 'loss', 'content': 0.031803734600543976, 'timestamp': '2025-09-10 02:47:02.990280', 'step': 17738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:03.021450', 'step': 17738, 'epoch': 3} {'type': 'loss', 'content': 0.11650549620389938, 'timestamp': '2025-09-10 02:47:03.023821', 'step': 17739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:03.059734', 'step': 17739, 'epoch': 3} {'type': 'loss', 'content': 0.11872857809066772, 'timestamp': '2025-09-10 02:47:03.091736', 'step': 17740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.125300', 'step': 17740, 'epoch': 3} {'type': 'loss', 'content': 0.09123437851667404, 'timestamp': '2025-09-10 02:47:03.127228', 'step': 17741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.157695', 'step': 17741, 'epoch': 3} {'type': 'loss', 'content': 0.10506874322891235, 'timestamp': '2025-09-10 02:47:03.160155', 'step': 17742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:03.208789', 'step': 17742, 'epoch': 3} {'type': 'loss', 'content': 0.026405084878206253, 'timestamp': '2025-09-10 02:47:03.210905', 'step': 17743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.242610', 'step': 17743, 'epoch': 3} {'type': 'loss', 'content': 0.04009125381708145, 'timestamp': '2025-09-10 02:47:03.267548', 'step': 17744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:03.301901', 'step': 17744, 'epoch': 3} {'type': 'loss', 'content': 0.09703348577022552, 'timestamp': '2025-09-10 02:47:03.304128', 'step': 17745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:03.337012', 'step': 17745, 'epoch': 3} {'type': 'loss', 'content': 0.07578922063112259, 'timestamp': '2025-09-10 02:47:03.342697', 'step': 17746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.380808', 'step': 17746, 'epoch': 3} {'type': 'loss', 'content': 0.05703379958868027, 'timestamp': '2025-09-10 02:47:03.383166', 'step': 17747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:03.413450', 'step': 17747, 'epoch': 3} {'type': 'loss', 'content': 0.03356221318244934, 'timestamp': '2025-09-10 02:47:03.437358', 'step': 17748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.469130', 'step': 17748, 'epoch': 3} {'type': 'loss', 'content': 0.011035481467843056, 'timestamp': '2025-09-10 02:47:03.471614', 'step': 17749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.503853', 'step': 17749, 'epoch': 3} {'type': 'loss', 'content': 0.08484884351491928, 'timestamp': '2025-09-10 02:47:03.508040', 'step': 17750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.540021', 'step': 17750, 'epoch': 3} {'type': 'loss', 'content': 0.07029163092374802, 'timestamp': '2025-09-10 02:47:03.542370', 'step': 17751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.574722', 'step': 17751, 'epoch': 3} {'type': 'loss', 'content': 0.058815695345401764, 'timestamp': '2025-09-10 02:47:03.598139', 'step': 17752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.633786', 'step': 17752, 'epoch': 3} {'type': 'loss', 'content': 0.035339515656232834, 'timestamp': '2025-09-10 02:47:03.636905', 'step': 17753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:03.668655', 'step': 17753, 'epoch': 3} {'type': 'loss', 'content': 0.03952324017882347, 'timestamp': '2025-09-10 02:47:03.672563', 'step': 17754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:03.703809', 'step': 17754, 'epoch': 3} {'type': 'loss', 'content': 0.030521467328071594, 'timestamp': '2025-09-10 02:47:03.708358', 'step': 17755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:03.739821', 'step': 17755, 'epoch': 3} {'type': 'loss', 'content': 0.10306069999933243, 'timestamp': '2025-09-10 02:47:03.763455', 'step': 17756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:03.794130', 'step': 17756, 'epoch': 3} {'type': 'loss', 'content': 0.09228627383708954, 'timestamp': '2025-09-10 02:47:03.802295', 'step': 17757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:03.842303', 'step': 17757, 'epoch': 3} {'type': 'loss', 'content': 0.10071960091590881, 'timestamp': '2025-09-10 02:47:03.844980', 'step': 17758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.877825', 'step': 17758, 'epoch': 3} {'type': 'loss', 'content': 0.05345337465405464, 'timestamp': '2025-09-10 02:47:03.881549', 'step': 17759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:03.917564', 'step': 17759, 'epoch': 3} {'type': 'loss', 'content': 0.07170955091714859, 'timestamp': '2025-09-10 02:47:03.941594', 'step': 17760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:03.979142', 'step': 17760, 'epoch': 3} {'type': 'loss', 'content': 0.08247312903404236, 'timestamp': '2025-09-10 02:47:03.983257', 'step': 17761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:04.014209', 'step': 17761, 'epoch': 3} {'type': 'loss', 'content': 0.06748947501182556, 'timestamp': '2025-09-10 02:47:04.019543', 'step': 17762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:04.055597', 'step': 17762, 'epoch': 3} {'type': 'loss', 'content': 0.04296661168336868, 'timestamp': '2025-09-10 02:47:04.060466', 'step': 17763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:04.105471', 'step': 17763, 'epoch': 3} {'type': 'loss', 'content': 0.07405845820903778, 'timestamp': '2025-09-10 02:47:04.132702', 'step': 17764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:04.176352', 'step': 17764, 'epoch': 3} {'type': 'loss', 'content': 0.02136840671300888, 'timestamp': '2025-09-10 02:47:04.182776', 'step': 17765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:04.224075', 'step': 17765, 'epoch': 3} {'type': 'loss', 'content': 0.07671273499727249, 'timestamp': '2025-09-10 02:47:04.232201', 'step': 17766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:04.275030', 'step': 17766, 'epoch': 3} {'type': 'loss', 'content': 0.05281459540128708, 'timestamp': '2025-09-10 02:47:04.279879', 'step': 17767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:04.316468', 'step': 17767, 'epoch': 3} {'type': 'loss', 'content': 0.04016551747918129, 'timestamp': '2025-09-10 02:47:04.342552', 'step': 17768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:04.380647', 'step': 17768, 'epoch': 3} {'type': 'loss', 'content': 0.03765677660703659, 'timestamp': '2025-09-10 02:47:04.383485', 'step': 17769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:04.417264', 'step': 17769, 'epoch': 3} {'type': 'loss', 'content': 0.029370371252298355, 'timestamp': '2025-09-10 02:47:04.420276', 'step': 17770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:04.453192', 'step': 17770, 'epoch': 3} {'type': 'loss', 'content': 0.04440973326563835, 'timestamp': '2025-09-10 02:47:04.455837', 'step': 17771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:04.488736', 'step': 17771, 'epoch': 3} {'type': 'loss', 'content': 0.06317049264907837, 'timestamp': '2025-09-10 02:47:04.513602', 'step': 17772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:04.547667', 'step': 17772, 'epoch': 3} {'type': 'loss', 'content': 0.040950436145067215, 'timestamp': '2025-09-10 02:47:04.550194', 'step': 17773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:04.581621', 'step': 17773, 'epoch': 3} {'type': 'loss', 'content': 0.06678249686956406, 'timestamp': '2025-09-10 02:47:04.584863', 'step': 17774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:04.618450', 'step': 17774, 'epoch': 3} {'type': 'loss', 'content': 0.0584135428071022, 'timestamp': '2025-09-10 02:47:04.624435', 'step': 17775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:04.677450', 'step': 17775, 'epoch': 3} {'type': 'loss', 'content': 0.04621949791908264, 'timestamp': '2025-09-10 02:47:04.705782', 'step': 17776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:04.754877', 'step': 17776, 'epoch': 3} {'type': 'loss', 'content': 0.06283275783061981, 'timestamp': '2025-09-10 02:47:04.766683', 'step': 17777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:04.825027', 'step': 17777, 'epoch': 3} {'type': 'loss', 'content': 0.04930472373962402, 'timestamp': '2025-09-10 02:47:04.829411', 'step': 17778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:04.871145', 'step': 17778, 'epoch': 3} {'type': 'loss', 'content': 0.08411260694265366, 'timestamp': '2025-09-10 02:47:04.874425', 'step': 17779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:04.910166', 'step': 17779, 'epoch': 3} {'type': 'loss', 'content': 0.10516008734703064, 'timestamp': '2025-09-10 02:47:04.935639', 'step': 17780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:04.984032', 'step': 17780, 'epoch': 3} {'type': 'loss', 'content': 0.06224549934267998, 'timestamp': '2025-09-10 02:47:04.989698', 'step': 17781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:05.027545', 'step': 17781, 'epoch': 3} {'type': 'loss', 'content': 0.0953531339764595, 'timestamp': '2025-09-10 02:47:05.029738', 'step': 17782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:05.060127', 'step': 17782, 'epoch': 3} {'type': 'loss', 'content': 0.012110831215977669, 'timestamp': '2025-09-10 02:47:05.072684', 'step': 17783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:05.121345', 'step': 17783, 'epoch': 3} {'type': 'loss', 'content': 0.031160274520516396, 'timestamp': '2025-09-10 02:47:05.154401', 'step': 17784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:05.197280', 'step': 17784, 'epoch': 3} {'type': 'loss', 'content': 0.06948110461235046, 'timestamp': '2025-09-10 02:47:05.199959', 'step': 17785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:05.231641', 'step': 17785, 'epoch': 3} {'type': 'loss', 'content': 0.02907796949148178, 'timestamp': '2025-09-10 02:47:05.235165', 'step': 17786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:05.269069', 'step': 17786, 'epoch': 3} {'type': 'loss', 'content': 0.03081987239420414, 'timestamp': '2025-09-10 02:47:05.272287', 'step': 17787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:05.305446', 'step': 17787, 'epoch': 3} {'type': 'loss', 'content': 0.05191914364695549, 'timestamp': '2025-09-10 02:47:05.331084', 'step': 17788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:05.369798', 'step': 17788, 'epoch': 3} {'type': 'loss', 'content': 0.09230565279722214, 'timestamp': '2025-09-10 02:47:05.375669', 'step': 17789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:05.420901', 'step': 17789, 'epoch': 3} {'type': 'loss', 'content': 0.06367089599370956, 'timestamp': '2025-09-10 02:47:05.424682', 'step': 17790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:05.460648', 'step': 17790, 'epoch': 3} {'type': 'loss', 'content': 0.0454070083796978, 'timestamp': '2025-09-10 02:47:05.470367', 'step': 17791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:05.524991', 'step': 17791, 'epoch': 3} {'type': 'loss', 'content': 0.02282303012907505, 'timestamp': '2025-09-10 02:47:05.549640', 'step': 17792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:05.584448', 'step': 17792, 'epoch': 3} {'type': 'loss', 'content': 0.09510662406682968, 'timestamp': '2025-09-10 02:47:05.588324', 'step': 17793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:05.623415', 'step': 17793, 'epoch': 3} {'type': 'loss', 'content': 0.06556540727615356, 'timestamp': '2025-09-10 02:47:05.627316', 'step': 17794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:05.661717', 'step': 17794, 'epoch': 3} {'type': 'loss', 'content': 0.018582472577691078, 'timestamp': '2025-09-10 02:47:05.666807', 'step': 17795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:05.709309', 'step': 17795, 'epoch': 3} {'type': 'loss', 'content': 0.06714329868555069, 'timestamp': '2025-09-10 02:47:05.739623', 'step': 17796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:05.790463', 'step': 17796, 'epoch': 3} {'type': 'loss', 'content': 0.14490565657615662, 'timestamp': '2025-09-10 02:47:05.792866', 'step': 17797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:05.823187', 'step': 17797, 'epoch': 3} {'type': 'loss', 'content': 0.09987764805555344, 'timestamp': '2025-09-10 02:47:05.825667', 'step': 17798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:05.857284', 'step': 17798, 'epoch': 3} {'type': 'loss', 'content': 0.03971662372350693, 'timestamp': '2025-09-10 02:47:05.861136', 'step': 17799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:05.903475', 'step': 17799, 'epoch': 3} {'type': 'loss', 'content': 0.12600725889205933, 'timestamp': '2025-09-10 02:47:05.940135', 'step': 17800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:06.023601', 'step': 17800, 'epoch': 3} {'type': 'loss', 'content': 0.06076284870505333, 'timestamp': '2025-09-10 02:47:06.039907', 'step': 17801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:06.073577', 'step': 17801, 'epoch': 3} {'type': 'loss', 'content': 0.16030696034431458, 'timestamp': '2025-09-10 02:47:06.088986', 'step': 17802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:06.145413', 'step': 17802, 'epoch': 3} {'type': 'loss', 'content': 0.08147218078374863, 'timestamp': '2025-09-10 02:47:06.153191', 'step': 17803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:06.207166', 'step': 17803, 'epoch': 3} {'type': 'loss', 'content': 0.08262521773576736, 'timestamp': '2025-09-10 02:47:06.232325', 'step': 17804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:06.269368', 'step': 17804, 'epoch': 3} {'type': 'loss', 'content': 0.015769515186548233, 'timestamp': '2025-09-10 02:47:06.271859', 'step': 17805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:06.304981', 'step': 17805, 'epoch': 3} {'type': 'loss', 'content': 0.025747906416654587, 'timestamp': '2025-09-10 02:47:06.310593', 'step': 17806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:06.345139', 'step': 17806, 'epoch': 3} {'type': 'loss', 'content': 0.04944327846169472, 'timestamp': '2025-09-10 02:47:06.351478', 'step': 17807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:06.387755', 'step': 17807, 'epoch': 3} {'type': 'loss', 'content': 0.05059197172522545, 'timestamp': '2025-09-10 02:47:06.412744', 'step': 17808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:06.452881', 'step': 17808, 'epoch': 3} {'type': 'loss', 'content': 0.07688966393470764, 'timestamp': '2025-09-10 02:47:06.457008', 'step': 17809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:06.507452', 'step': 17809, 'epoch': 3} {'type': 'loss', 'content': 0.08361939340829849, 'timestamp': '2025-09-10 02:47:06.514289', 'step': 17810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:06.563906', 'step': 17810, 'epoch': 3} {'type': 'loss', 'content': 0.03221077844500542, 'timestamp': '2025-09-10 02:47:06.566787', 'step': 17811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:06.606740', 'step': 17811, 'epoch': 3} {'type': 'loss', 'content': 0.04024910926818848, 'timestamp': '2025-09-10 02:47:06.632613', 'step': 17812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:06.685722', 'step': 17812, 'epoch': 3} {'type': 'loss', 'content': 0.046668391674757004, 'timestamp': '2025-09-10 02:47:06.694922', 'step': 17813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:06.735579', 'step': 17813, 'epoch': 3} {'type': 'loss', 'content': 0.08495248109102249, 'timestamp': '2025-09-10 02:47:06.740124', 'step': 17814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:06.804861', 'step': 17814, 'epoch': 3} {'type': 'loss', 'content': 0.05252402275800705, 'timestamp': '2025-09-10 02:47:06.809797', 'step': 17815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:47:06.852642', 'step': 17815, 'epoch': 3} {'type': 'loss', 'content': 0.08392907679080963, 'timestamp': '2025-09-10 02:47:06.879042', 'step': 17816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:06.918322', 'step': 17816, 'epoch': 3} {'type': 'loss', 'content': 0.03756435215473175, 'timestamp': '2025-09-10 02:47:06.925385', 'step': 17817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:06.980293', 'step': 17817, 'epoch': 3} {'type': 'loss', 'content': 0.07218310981988907, 'timestamp': '2025-09-10 02:47:06.984887', 'step': 17818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:07.024369', 'step': 17818, 'epoch': 3} {'type': 'loss', 'content': 0.1506938487291336, 'timestamp': '2025-09-10 02:47:07.030625', 'step': 17819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:07.083140', 'step': 17819, 'epoch': 3} {'type': 'loss', 'content': 0.039544153958559036, 'timestamp': '2025-09-10 02:47:07.108693', 'step': 17820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:07.149023', 'step': 17820, 'epoch': 3} {'type': 'loss', 'content': 0.10582523047924042, 'timestamp': '2025-09-10 02:47:07.165645', 'step': 17821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:07.217272', 'step': 17821, 'epoch': 3} {'type': 'loss', 'content': 0.03809450939297676, 'timestamp': '2025-09-10 02:47:07.238098', 'step': 17822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:07.275497', 'step': 17822, 'epoch': 3} {'type': 'loss', 'content': 0.10234208405017853, 'timestamp': '2025-09-10 02:47:07.279685', 'step': 17823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:07.356723', 'step': 17823, 'epoch': 3} {'type': 'loss', 'content': 0.04070564731955528, 'timestamp': '2025-09-10 02:47:07.383543', 'step': 17824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:07.422243', 'step': 17824, 'epoch': 3} {'type': 'loss', 'content': 0.05548928305506706, 'timestamp': '2025-09-10 02:47:07.444704', 'step': 17825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:07.523695', 'step': 17825, 'epoch': 3} {'type': 'loss', 'content': 0.17829130589962006, 'timestamp': '2025-09-10 02:47:07.527789', 'step': 17826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:07.575625', 'step': 17826, 'epoch': 3} {'type': 'loss', 'content': 0.059705596417188644, 'timestamp': '2025-09-10 02:47:07.580848', 'step': 17827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:07.622139', 'step': 17827, 'epoch': 3} {'type': 'loss', 'content': 0.07953819632530212, 'timestamp': '2025-09-10 02:47:07.664689', 'step': 17828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:07.704225', 'step': 17828, 'epoch': 3} {'type': 'loss', 'content': 0.07370277494192123, 'timestamp': '2025-09-10 02:47:07.708755', 'step': 17829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:07.746279', 'step': 17829, 'epoch': 3} {'type': 'loss', 'content': 0.04309624806046486, 'timestamp': '2025-09-10 02:47:07.751193', 'step': 17830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:07.822416', 'step': 17830, 'epoch': 3} {'type': 'loss', 'content': 0.047685656696558, 'timestamp': '2025-09-10 02:47:07.827530', 'step': 17831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:07.895318', 'step': 17831, 'epoch': 3} {'type': 'loss', 'content': 0.09967544674873352, 'timestamp': '2025-09-10 02:47:07.924152', 'step': 17832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:07.975808', 'step': 17832, 'epoch': 3} {'type': 'loss', 'content': 0.08321326971054077, 'timestamp': '2025-09-10 02:47:07.982603', 'step': 17833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:08.037904', 'step': 17833, 'epoch': 3} {'type': 'loss', 'content': 0.04607447236776352, 'timestamp': '2025-09-10 02:47:08.043585', 'step': 17834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:08.088181', 'step': 17834, 'epoch': 3} {'type': 'loss', 'content': 0.06490985304117203, 'timestamp': '2025-09-10 02:47:08.096936', 'step': 17835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:08.155191', 'step': 17835, 'epoch': 3} {'type': 'loss', 'content': 0.0712272897362709, 'timestamp': '2025-09-10 02:47:08.181359', 'step': 17836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:08.221882', 'step': 17836, 'epoch': 3} {'type': 'loss', 'content': 0.05711978301405907, 'timestamp': '2025-09-10 02:47:08.225198', 'step': 17837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:08.262258', 'step': 17837, 'epoch': 3} {'type': 'loss', 'content': 0.048890121281147, 'timestamp': '2025-09-10 02:47:08.266222', 'step': 17838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:08.322300', 'step': 17838, 'epoch': 3} {'type': 'loss', 'content': 0.09573134779930115, 'timestamp': '2025-09-10 02:47:08.325743', 'step': 17839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:08.366023', 'step': 17839, 'epoch': 3} {'type': 'loss', 'content': 0.12833426892757416, 'timestamp': '2025-09-10 02:47:08.390848', 'step': 17840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:08.426185', 'step': 17840, 'epoch': 3} {'type': 'loss', 'content': 0.09416957944631577, 'timestamp': '2025-09-10 02:47:08.428721', 'step': 17841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:08.462822', 'step': 17841, 'epoch': 3} {'type': 'loss', 'content': 0.043044500052928925, 'timestamp': '2025-09-10 02:47:08.467244', 'step': 17842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:08.503430', 'step': 17842, 'epoch': 3} {'type': 'loss', 'content': 0.05631071701645851, 'timestamp': '2025-09-10 02:47:08.508242', 'step': 17843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:08.540363', 'step': 17843, 'epoch': 3} {'type': 'loss', 'content': 0.09123076498508453, 'timestamp': '2025-09-10 02:47:08.564584', 'step': 17844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:08.597129', 'step': 17844, 'epoch': 3} {'type': 'loss', 'content': 0.10882675647735596, 'timestamp': '2025-09-10 02:47:08.606357', 'step': 17845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:08.639664', 'step': 17845, 'epoch': 3} {'type': 'loss', 'content': 0.05737831071019173, 'timestamp': '2025-09-10 02:47:08.645674', 'step': 17846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:08.680640', 'step': 17846, 'epoch': 3} {'type': 'loss', 'content': 0.028834521770477295, 'timestamp': '2025-09-10 02:47:08.683961', 'step': 17847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:08.714295', 'step': 17847, 'epoch': 3} {'type': 'loss', 'content': 0.04296223074197769, 'timestamp': '2025-09-10 02:47:08.738650', 'step': 17848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:08.771297', 'step': 17848, 'epoch': 3} {'type': 'loss', 'content': 0.16197331249713898, 'timestamp': '2025-09-10 02:47:08.775577', 'step': 17849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:08.814508', 'step': 17849, 'epoch': 3} {'type': 'loss', 'content': 0.10429000854492188, 'timestamp': '2025-09-10 02:47:08.818334', 'step': 17850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:08.861379', 'step': 17850, 'epoch': 3} {'type': 'loss', 'content': 0.05919488146901131, 'timestamp': '2025-09-10 02:47:08.864252', 'step': 17851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:08.901457', 'step': 17851, 'epoch': 3} {'type': 'loss', 'content': 0.07529735565185547, 'timestamp': '2025-09-10 02:47:08.926158', 'step': 17852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:08.959203', 'step': 17852, 'epoch': 3} {'type': 'loss', 'content': 0.12703339755535126, 'timestamp': '2025-09-10 02:47:08.967770', 'step': 17853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:09.012705', 'step': 17853, 'epoch': 3} {'type': 'loss', 'content': 0.12260374426841736, 'timestamp': '2025-09-10 02:47:09.023380', 'step': 17854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:09.085063', 'step': 17854, 'epoch': 3} {'type': 'loss', 'content': 0.06730198860168457, 'timestamp': '2025-09-10 02:47:09.089553', 'step': 17855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:47:09.150748', 'step': 17855, 'epoch': 3} {'type': 'loss', 'content': 0.11921114474534988, 'timestamp': '2025-09-10 02:47:09.176093', 'step': 17856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:09.239770', 'step': 17856, 'epoch': 3} {'type': 'loss', 'content': 0.15382738411426544, 'timestamp': '2025-09-10 02:47:09.244123', 'step': 17857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:09.294374', 'step': 17857, 'epoch': 3} {'type': 'loss', 'content': 0.1348661631345749, 'timestamp': '2025-09-10 02:47:09.312422', 'step': 17858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:09.392548', 'step': 17858, 'epoch': 3} {'type': 'loss', 'content': 0.09416847676038742, 'timestamp': '2025-09-10 02:47:09.409417', 'step': 17859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:09.484331', 'step': 17859, 'epoch': 3} {'type': 'loss', 'content': 0.11428707093000412, 'timestamp': '2025-09-10 02:47:09.523762', 'step': 17860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:09.583760', 'step': 17860, 'epoch': 3} {'type': 'loss', 'content': 0.061381250619888306, 'timestamp': '2025-09-10 02:47:09.597913', 'step': 17861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:09.651638', 'step': 17861, 'epoch': 3} {'type': 'loss', 'content': 0.05973943695425987, 'timestamp': '2025-09-10 02:47:09.660681', 'step': 17862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:09.719074', 'step': 17862, 'epoch': 3} {'type': 'loss', 'content': 0.03636760637164116, 'timestamp': '2025-09-10 02:47:09.721987', 'step': 17863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:09.780486', 'step': 17863, 'epoch': 3} {'type': 'loss', 'content': 0.04215604439377785, 'timestamp': '2025-09-10 02:47:09.805512', 'step': 17864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:09.856653', 'step': 17864, 'epoch': 3} {'type': 'loss', 'content': 0.09287753701210022, 'timestamp': '2025-09-10 02:47:09.859095', 'step': 17865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:09.907838', 'step': 17865, 'epoch': 3} {'type': 'loss', 'content': 0.08659811317920685, 'timestamp': '2025-09-10 02:47:09.923400', 'step': 17866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:09.999684', 'step': 17866, 'epoch': 3} {'type': 'loss', 'content': 0.10649660229682922, 'timestamp': '2025-09-10 02:47:10.014630', 'step': 17867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:10.084178', 'step': 17867, 'epoch': 3} {'type': 'loss', 'content': 0.07528931647539139, 'timestamp': '2025-09-10 02:47:10.122242', 'step': 17868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:10.217958', 'step': 17868, 'epoch': 3} {'type': 'loss', 'content': 0.04514406621456146, 'timestamp': '2025-09-10 02:47:10.237675', 'step': 17869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:10.274902', 'step': 17869, 'epoch': 3} {'type': 'loss', 'content': 0.0422455295920372, 'timestamp': '2025-09-10 02:47:10.278747', 'step': 17870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:10.345481', 'step': 17870, 'epoch': 3} {'type': 'loss', 'content': 0.044806066900491714, 'timestamp': '2025-09-10 02:47:10.366487', 'step': 17871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:10.405022', 'step': 17871, 'epoch': 3} {'type': 'loss', 'content': 0.176282599568367, 'timestamp': '2025-09-10 02:47:10.445973', 'step': 17872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:10.497082', 'step': 17872, 'epoch': 3} {'type': 'loss', 'content': 0.07985783368349075, 'timestamp': '2025-09-10 02:47:10.517320', 'step': 17873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:10.556181', 'step': 17873, 'epoch': 3} {'type': 'loss', 'content': 0.11332911998033524, 'timestamp': '2025-09-10 02:47:10.573952', 'step': 17874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:10.635771', 'step': 17874, 'epoch': 3} {'type': 'loss', 'content': 0.0663641020655632, 'timestamp': '2025-09-10 02:47:10.641954', 'step': 17875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:10.684192', 'step': 17875, 'epoch': 3} {'type': 'loss', 'content': 0.07900223881006241, 'timestamp': '2025-09-10 02:47:10.707823', 'step': 17876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:10.740792', 'step': 17876, 'epoch': 3} {'type': 'loss', 'content': 0.0789913758635521, 'timestamp': '2025-09-10 02:47:10.743686', 'step': 17877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:10.774843', 'step': 17877, 'epoch': 3} {'type': 'loss', 'content': 0.13957147300243378, 'timestamp': '2025-09-10 02:47:10.778101', 'step': 17878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:10.809395', 'step': 17878, 'epoch': 3} {'type': 'loss', 'content': 0.07637283951044083, 'timestamp': '2025-09-10 02:47:10.816628', 'step': 17879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:10.850536', 'step': 17879, 'epoch': 3} {'type': 'loss', 'content': 0.1571274995803833, 'timestamp': '2025-09-10 02:47:10.874236', 'step': 17880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:10.904423', 'step': 17880, 'epoch': 3} {'type': 'loss', 'content': 0.08337316662073135, 'timestamp': '2025-09-10 02:47:10.906456', 'step': 17881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:10.936351', 'step': 17881, 'epoch': 3} {'type': 'loss', 'content': 0.1033276692032814, 'timestamp': '2025-09-10 02:47:10.938715', 'step': 17882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:10.969301', 'step': 17882, 'epoch': 3} {'type': 'loss', 'content': 0.10182834416627884, 'timestamp': '2025-09-10 02:47:10.971628', 'step': 17883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:11.003582', 'step': 17883, 'epoch': 3} {'type': 'loss', 'content': 0.12653659284114838, 'timestamp': '2025-09-10 02:47:11.028636', 'step': 17884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:11.058666', 'step': 17884, 'epoch': 3} {'type': 'loss', 'content': 0.102165587246418, 'timestamp': '2025-09-10 02:47:11.061774', 'step': 17885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:11.092946', 'step': 17885, 'epoch': 3} {'type': 'loss', 'content': 0.06811806559562683, 'timestamp': '2025-09-10 02:47:11.095373', 'step': 17886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:11.125273', 'step': 17886, 'epoch': 3} {'type': 'loss', 'content': 0.07597602158784866, 'timestamp': '2025-09-10 02:47:11.127663', 'step': 17887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:11.157628', 'step': 17887, 'epoch': 3} {'type': 'loss', 'content': 0.05208616703748703, 'timestamp': '2025-09-10 02:47:11.181575', 'step': 17888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:11.211871', 'step': 17888, 'epoch': 3} {'type': 'loss', 'content': 0.08273807168006897, 'timestamp': '2025-09-10 02:47:11.214173', 'step': 17889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:11.244813', 'step': 17889, 'epoch': 3} {'type': 'loss', 'content': 0.07329411059617996, 'timestamp': '2025-09-10 02:47:11.247436', 'step': 17890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:11.277281', 'step': 17890, 'epoch': 3} {'type': 'loss', 'content': 0.08349601179361343, 'timestamp': '2025-09-10 02:47:11.279544', 'step': 17891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:11.309952', 'step': 17891, 'epoch': 3} {'type': 'loss', 'content': 0.12420374900102615, 'timestamp': '2025-09-10 02:47:11.333497', 'step': 17892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:11.363839', 'step': 17892, 'epoch': 3} {'type': 'loss', 'content': 0.13661295175552368, 'timestamp': '2025-09-10 02:47:11.366110', 'step': 17893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:11.396893', 'step': 17893, 'epoch': 3} {'type': 'loss', 'content': 0.0407099574804306, 'timestamp': '2025-09-10 02:47:11.399733', 'step': 17894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:11.430132', 'step': 17894, 'epoch': 3} {'type': 'loss', 'content': 0.044596944004297256, 'timestamp': '2025-09-10 02:47:11.432394', 'step': 17895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:11.461803', 'step': 17895, 'epoch': 3} {'type': 'loss', 'content': 0.06059182062745094, 'timestamp': '2025-09-10 02:47:11.485304', 'step': 17896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:47:11.515556', 'step': 17896, 'epoch': 3} {'type': 'loss', 'content': 0.06982164084911346, 'timestamp': '2025-09-10 02:47:11.520383', 'step': 17897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:11.550330', 'step': 17897, 'epoch': 3} {'type': 'loss', 'content': 0.050095245242118835, 'timestamp': '2025-09-10 02:47:11.552577', 'step': 17898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:11.582623', 'step': 17898, 'epoch': 3} {'type': 'loss', 'content': 0.0758989080786705, 'timestamp': '2025-09-10 02:47:11.584799', 'step': 17899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:11.614294', 'step': 17899, 'epoch': 3} {'type': 'loss', 'content': 0.09706945717334747, 'timestamp': '2025-09-10 02:47:11.639221', 'step': 17900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:11.669789', 'step': 17900, 'epoch': 3} {'type': 'loss', 'content': 0.0677618607878685, 'timestamp': '2025-09-10 02:47:11.671985', 'step': 17901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:11.701531', 'step': 17901, 'epoch': 3} {'type': 'loss', 'content': 0.06424008309841156, 'timestamp': '2025-09-10 02:47:11.703347', 'step': 17902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:11.732900', 'step': 17902, 'epoch': 3} {'type': 'loss', 'content': 0.06839480996131897, 'timestamp': '2025-09-10 02:47:11.735160', 'step': 17903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:11.765526', 'step': 17903, 'epoch': 3} {'type': 'loss', 'content': 0.08275880664587021, 'timestamp': '2025-09-10 02:47:11.789190', 'step': 17904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:11.820350', 'step': 17904, 'epoch': 3} {'type': 'loss', 'content': 0.05042710900306702, 'timestamp': '2025-09-10 02:47:11.822656', 'step': 17905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:11.853667', 'step': 17905, 'epoch': 3} {'type': 'loss', 'content': 0.05904723331332207, 'timestamp': '2025-09-10 02:47:11.856434', 'step': 17906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:11.886916', 'step': 17906, 'epoch': 3} {'type': 'loss', 'content': 0.04318684712052345, 'timestamp': '2025-09-10 02:47:11.889281', 'step': 17907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:11.919226', 'step': 17907, 'epoch': 3} {'type': 'loss', 'content': 0.09539752453565598, 'timestamp': '2025-09-10 02:47:11.944277', 'step': 17908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:11.974485', 'step': 17908, 'epoch': 3} {'type': 'loss', 'content': 0.14234691858291626, 'timestamp': '2025-09-10 02:47:11.977991', 'step': 17909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:12.010631', 'step': 17909, 'epoch': 3} {'type': 'loss', 'content': 0.0743950754404068, 'timestamp': '2025-09-10 02:47:12.013072', 'step': 17910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:12.047828', 'step': 17910, 'epoch': 3} {'type': 'loss', 'content': 0.08752214908599854, 'timestamp': '2025-09-10 02:47:12.050127', 'step': 17911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.087525', 'step': 17911, 'epoch': 3} {'type': 'loss', 'content': 0.09187311679124832, 'timestamp': '2025-09-10 02:47:12.111488', 'step': 17912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:12.147014', 'step': 17912, 'epoch': 3} {'type': 'loss', 'content': 0.09074509888887405, 'timestamp': '2025-09-10 02:47:12.151571', 'step': 17913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.183190', 'step': 17913, 'epoch': 3} {'type': 'loss', 'content': 0.0817684605717659, 'timestamp': '2025-09-10 02:47:12.193493', 'step': 17914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.228026', 'step': 17914, 'epoch': 3} {'type': 'loss', 'content': 0.037638429552316666, 'timestamp': '2025-09-10 02:47:12.233235', 'step': 17915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:12.265564', 'step': 17915, 'epoch': 3} {'type': 'loss', 'content': 0.03927873075008392, 'timestamp': '2025-09-10 02:47:12.289191', 'step': 17916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.325247', 'step': 17916, 'epoch': 3} {'type': 'loss', 'content': 0.05568213388323784, 'timestamp': '2025-09-10 02:47:12.329832', 'step': 17917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:12.364846', 'step': 17917, 'epoch': 3} {'type': 'loss', 'content': 0.07413221895694733, 'timestamp': '2025-09-10 02:47:12.367454', 'step': 17918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:12.398559', 'step': 17918, 'epoch': 3} {'type': 'loss', 'content': 0.057610154151916504, 'timestamp': '2025-09-10 02:47:12.408917', 'step': 17919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:12.440091', 'step': 17919, 'epoch': 3} {'type': 'loss', 'content': 0.04923326149582863, 'timestamp': '2025-09-10 02:47:12.463879', 'step': 17920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.496444', 'step': 17920, 'epoch': 3} {'type': 'loss', 'content': 0.05192533880472183, 'timestamp': '2025-09-10 02:47:12.504812', 'step': 17921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.547183', 'step': 17921, 'epoch': 3} {'type': 'loss', 'content': 0.0951417088508606, 'timestamp': '2025-09-10 02:47:12.550109', 'step': 17922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.599594', 'step': 17922, 'epoch': 3} {'type': 'loss', 'content': 0.0910319983959198, 'timestamp': '2025-09-10 02:47:12.602987', 'step': 17923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.636466', 'step': 17923, 'epoch': 3} {'type': 'loss', 'content': 0.04059016704559326, 'timestamp': '2025-09-10 02:47:12.659593', 'step': 17924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:12.690814', 'step': 17924, 'epoch': 3} {'type': 'loss', 'content': 0.04136141762137413, 'timestamp': '2025-09-10 02:47:12.692976', 'step': 17925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.723923', 'step': 17925, 'epoch': 3} {'type': 'loss', 'content': 0.11105869710445404, 'timestamp': '2025-09-10 02:47:12.726400', 'step': 17926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:12.770670', 'step': 17926, 'epoch': 3} {'type': 'loss', 'content': 0.07143954187631607, 'timestamp': '2025-09-10 02:47:12.777392', 'step': 17927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.818775', 'step': 17927, 'epoch': 3} {'type': 'loss', 'content': 0.02451927587389946, 'timestamp': '2025-09-10 02:47:12.844989', 'step': 17928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:12.879151', 'step': 17928, 'epoch': 3} {'type': 'loss', 'content': 0.0727405846118927, 'timestamp': '2025-09-10 02:47:12.881415', 'step': 17929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:12.915000', 'step': 17929, 'epoch': 3} {'type': 'loss', 'content': 0.14922267198562622, 'timestamp': '2025-09-10 02:47:12.918763', 'step': 17930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:12.950654', 'step': 17930, 'epoch': 3} {'type': 'loss', 'content': 0.03529723361134529, 'timestamp': '2025-09-10 02:47:12.953681', 'step': 17931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:12.991415', 'step': 17931, 'epoch': 3} {'type': 'loss', 'content': 0.004506344441324472, 'timestamp': '2025-09-10 02:47:13.014940', 'step': 17932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.049135', 'step': 17932, 'epoch': 3} {'type': 'loss', 'content': 0.06180814653635025, 'timestamp': '2025-09-10 02:47:13.052434', 'step': 17933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:13.083599', 'step': 17933, 'epoch': 3} {'type': 'loss', 'content': 0.04445863142609596, 'timestamp': '2025-09-10 02:47:13.086029', 'step': 17934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.125629', 'step': 17934, 'epoch': 3} {'type': 'loss', 'content': 0.05277666822075844, 'timestamp': '2025-09-10 02:47:13.127929', 'step': 17935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.158101', 'step': 17935, 'epoch': 3} {'type': 'loss', 'content': 0.07726283371448517, 'timestamp': '2025-09-10 02:47:13.182438', 'step': 17936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:13.213855', 'step': 17936, 'epoch': 3} {'type': 'loss', 'content': 0.04962541162967682, 'timestamp': '2025-09-10 02:47:13.216967', 'step': 17937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:13.247209', 'step': 17937, 'epoch': 3} {'type': 'loss', 'content': 0.1045733243227005, 'timestamp': '2025-09-10 02:47:13.253758', 'step': 17938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.289559', 'step': 17938, 'epoch': 3} {'type': 'loss', 'content': 0.03572334349155426, 'timestamp': '2025-09-10 02:47:13.293822', 'step': 17939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:13.324137', 'step': 17939, 'epoch': 3} {'type': 'loss', 'content': 0.07895597070455551, 'timestamp': '2025-09-10 02:47:13.347505', 'step': 17940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.379254', 'step': 17940, 'epoch': 3} {'type': 'loss', 'content': 0.07049831002950668, 'timestamp': '2025-09-10 02:47:13.381480', 'step': 17941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:13.419295', 'step': 17941, 'epoch': 3} {'type': 'loss', 'content': 0.06762823462486267, 'timestamp': '2025-09-10 02:47:13.422345', 'step': 17942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.454608', 'step': 17942, 'epoch': 3} {'type': 'loss', 'content': 0.08899204432964325, 'timestamp': '2025-09-10 02:47:13.456984', 'step': 17943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:13.511741', 'step': 17943, 'epoch': 3} {'type': 'loss', 'content': 0.05295240506529808, 'timestamp': '2025-09-10 02:47:13.535374', 'step': 17944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:13.566302', 'step': 17944, 'epoch': 3} {'type': 'loss', 'content': 0.06594420969486237, 'timestamp': '2025-09-10 02:47:13.569553', 'step': 17945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.607370', 'step': 17945, 'epoch': 3} {'type': 'loss', 'content': 0.014426286332309246, 'timestamp': '2025-09-10 02:47:13.610970', 'step': 17946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:13.650805', 'step': 17946, 'epoch': 3} {'type': 'loss', 'content': 0.07562415301799774, 'timestamp': '2025-09-10 02:47:13.653606', 'step': 17947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:13.684492', 'step': 17947, 'epoch': 3} {'type': 'loss', 'content': 0.0991351529955864, 'timestamp': '2025-09-10 02:47:13.708066', 'step': 17948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.741032', 'step': 17948, 'epoch': 3} {'type': 'loss', 'content': 0.09298629313707352, 'timestamp': '2025-09-10 02:47:13.743634', 'step': 17949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.793497', 'step': 17949, 'epoch': 3} {'type': 'loss', 'content': 0.09628623723983765, 'timestamp': '2025-09-10 02:47:13.795980', 'step': 17950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.827122', 'step': 17950, 'epoch': 3} {'type': 'loss', 'content': 0.0727405697107315, 'timestamp': '2025-09-10 02:47:13.829574', 'step': 17951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.861127', 'step': 17951, 'epoch': 3} {'type': 'loss', 'content': 0.05998990312218666, 'timestamp': '2025-09-10 02:47:13.886467', 'step': 17952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:13.917114', 'step': 17952, 'epoch': 3} {'type': 'loss', 'content': 0.11707068979740143, 'timestamp': '2025-09-10 02:47:13.919904', 'step': 17953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:13.950681', 'step': 17953, 'epoch': 3} {'type': 'loss', 'content': 0.062412653118371964, 'timestamp': '2025-09-10 02:47:13.952768', 'step': 17954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:13.982788', 'step': 17954, 'epoch': 3} {'type': 'loss', 'content': 0.06279995292425156, 'timestamp': '2025-09-10 02:47:13.986329', 'step': 17955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:14.016291', 'step': 17955, 'epoch': 3} {'type': 'loss', 'content': 0.14294731616973877, 'timestamp': '2025-09-10 02:47:14.039957', 'step': 17956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:14.073242', 'step': 17956, 'epoch': 3} {'type': 'loss', 'content': 0.04256461188197136, 'timestamp': '2025-09-10 02:47:14.075417', 'step': 17957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:14.105914', 'step': 17957, 'epoch': 3} {'type': 'loss', 'content': 0.14956097304821014, 'timestamp': '2025-09-10 02:47:14.108243', 'step': 17958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:14.138702', 'step': 17958, 'epoch': 3} {'type': 'loss', 'content': 0.022030634805560112, 'timestamp': '2025-09-10 02:47:14.141096', 'step': 17959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:14.172664', 'step': 17959, 'epoch': 3} {'type': 'loss', 'content': 0.09909740835428238, 'timestamp': '2025-09-10 02:47:14.198594', 'step': 17960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:14.230943', 'step': 17960, 'epoch': 3} {'type': 'loss', 'content': 0.06103717163205147, 'timestamp': '2025-09-10 02:47:14.233381', 'step': 17961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:14.264264', 'step': 17961, 'epoch': 3} {'type': 'loss', 'content': 0.05286356806755066, 'timestamp': '2025-09-10 02:47:14.266955', 'step': 17962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:14.298409', 'step': 17962, 'epoch': 3} {'type': 'loss', 'content': 0.07704069465398788, 'timestamp': '2025-09-10 02:47:14.300855', 'step': 17963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:14.330266', 'step': 17963, 'epoch': 3} {'type': 'loss', 'content': 0.05640651285648346, 'timestamp': '2025-09-10 02:47:14.353673', 'step': 17964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:14.384478', 'step': 17964, 'epoch': 3} {'type': 'loss', 'content': 0.11900384724140167, 'timestamp': '2025-09-10 02:47:14.386676', 'step': 17965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:14.417288', 'step': 17965, 'epoch': 3} {'type': 'loss', 'content': 0.05263648182153702, 'timestamp': '2025-09-10 02:47:14.419227', 'step': 17966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:14.449681', 'step': 17966, 'epoch': 3} {'type': 'loss', 'content': 0.07206565141677856, 'timestamp': '2025-09-10 02:47:14.452263', 'step': 17967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:14.485043', 'step': 17967, 'epoch': 3} {'type': 'loss', 'content': 0.06750697642564774, 'timestamp': '2025-09-10 02:47:14.508639', 'step': 17968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:14.539816', 'step': 17968, 'epoch': 3} {'type': 'loss', 'content': 0.059355366975069046, 'timestamp': '2025-09-10 02:47:14.542106', 'step': 17969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:14.574965', 'step': 17969, 'epoch': 3} {'type': 'loss', 'content': 0.020683562383055687, 'timestamp': '2025-09-10 02:47:14.577318', 'step': 17970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:14.607120', 'step': 17970, 'epoch': 3} {'type': 'loss', 'content': 0.09390875697135925, 'timestamp': '2025-09-10 02:47:14.609333', 'step': 17971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:14.639179', 'step': 17971, 'epoch': 3} {'type': 'loss', 'content': 0.08011502772569656, 'timestamp': '2025-09-10 02:47:14.662399', 'step': 17972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:14.693321', 'step': 17972, 'epoch': 3} {'type': 'loss', 'content': 0.10067455470561981, 'timestamp': '2025-09-10 02:47:14.695139', 'step': 17973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:14.725628', 'step': 17973, 'epoch': 3} {'type': 'loss', 'content': 0.04674986004829407, 'timestamp': '2025-09-10 02:47:14.727861', 'step': 17974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:14.758126', 'step': 17974, 'epoch': 3} {'type': 'loss', 'content': 0.013809219002723694, 'timestamp': '2025-09-10 02:47:14.760344', 'step': 17975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:14.790319', 'step': 17975, 'epoch': 3} {'type': 'loss', 'content': 0.0879790186882019, 'timestamp': '2025-09-10 02:47:14.814110', 'step': 17976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:14.843937', 'step': 17976, 'epoch': 3} {'type': 'loss', 'content': 0.022534318268299103, 'timestamp': '2025-09-10 02:47:14.846249', 'step': 17977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:14.876595', 'step': 17977, 'epoch': 3} {'type': 'loss', 'content': 0.10193464159965515, 'timestamp': '2025-09-10 02:47:14.879732', 'step': 17978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:14.912503', 'step': 17978, 'epoch': 3} {'type': 'loss', 'content': 0.10377374291419983, 'timestamp': '2025-09-10 02:47:14.915870', 'step': 17979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:14.952036', 'step': 17979, 'epoch': 3} {'type': 'loss', 'content': 0.1271783858537674, 'timestamp': '2025-09-10 02:47:14.975891', 'step': 17980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:15.020184', 'step': 17980, 'epoch': 3} {'type': 'loss', 'content': 0.0069327098317444324, 'timestamp': '2025-09-10 02:47:15.024381', 'step': 17981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:15.064328', 'step': 17981, 'epoch': 3} {'type': 'loss', 'content': 0.04145583510398865, 'timestamp': '2025-09-10 02:47:15.067764', 'step': 17982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:15.103041', 'step': 17982, 'epoch': 3} {'type': 'loss', 'content': 0.055072441697120667, 'timestamp': '2025-09-10 02:47:15.105411', 'step': 17983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:15.135847', 'step': 17983, 'epoch': 3} {'type': 'loss', 'content': 0.09293602406978607, 'timestamp': '2025-09-10 02:47:15.159844', 'step': 17984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:15.189768', 'step': 17984, 'epoch': 3} {'type': 'loss', 'content': 0.04306955263018608, 'timestamp': '2025-09-10 02:47:15.191984', 'step': 17985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:15.222051', 'step': 17985, 'epoch': 3} {'type': 'loss', 'content': 0.06850674003362656, 'timestamp': '2025-09-10 02:47:15.224164', 'step': 17986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:15.254385', 'step': 17986, 'epoch': 3} {'type': 'loss', 'content': 0.027063746005296707, 'timestamp': '2025-09-10 02:47:15.256553', 'step': 17987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:15.288258', 'step': 17987, 'epoch': 3} {'type': 'loss', 'content': 0.06066759675741196, 'timestamp': '2025-09-10 02:47:15.311564', 'step': 17988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:15.342371', 'step': 17988, 'epoch': 3} {'type': 'loss', 'content': 0.13615648448467255, 'timestamp': '2025-09-10 02:47:15.344629', 'step': 17989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:15.374948', 'step': 17989, 'epoch': 3} {'type': 'loss', 'content': 0.11602085083723068, 'timestamp': '2025-09-10 02:47:15.377271', 'step': 17990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:15.407614', 'step': 17990, 'epoch': 3} {'type': 'loss', 'content': 0.055154718458652496, 'timestamp': '2025-09-10 02:47:15.409912', 'step': 17991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:15.439856', 'step': 17991, 'epoch': 3} {'type': 'loss', 'content': 0.021690523251891136, 'timestamp': '2025-09-10 02:47:15.463917', 'step': 17992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:15.495098', 'step': 17992, 'epoch': 3} {'type': 'loss', 'content': 0.08403529971837997, 'timestamp': '2025-09-10 02:47:15.497315', 'step': 17993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:15.528348', 'step': 17993, 'epoch': 3} {'type': 'loss', 'content': 0.07879830151796341, 'timestamp': '2025-09-10 02:47:15.530519', 'step': 17994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:15.560856', 'step': 17994, 'epoch': 3} {'type': 'loss', 'content': 0.04992159456014633, 'timestamp': '2025-09-10 02:47:15.564458', 'step': 17995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:15.594193', 'step': 17995, 'epoch': 3} {'type': 'loss', 'content': 0.01630285009741783, 'timestamp': '2025-09-10 02:47:15.617752', 'step': 17996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:15.648675', 'step': 17996, 'epoch': 3} {'type': 'loss', 'content': 0.10523613542318344, 'timestamp': '2025-09-10 02:47:15.651229', 'step': 17997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:15.681246', 'step': 17997, 'epoch': 3} {'type': 'loss', 'content': 0.042825572192668915, 'timestamp': '2025-09-10 02:47:15.684321', 'step': 17998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:15.714514', 'step': 17998, 'epoch': 3} {'type': 'loss', 'content': 0.04486600309610367, 'timestamp': '2025-09-10 02:47:15.716675', 'step': 17999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:15.746906', 'step': 17999, 'epoch': 3} {'type': 'loss', 'content': 0.16285841166973114, 'timestamp': '2025-09-10 02:47:15.770462', 'step': 18000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18000', 'timestamp': '2025-09-10 02:47:20.658932', 'step': 18000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:20.690076', 'step': 18000, 'epoch': 3} {'type': 'loss', 'content': 0.08974718302488327, 'timestamp': '2025-09-10 02:47:20.692461', 'step': 18001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:20.723765', 'step': 18001, 'epoch': 3} {'type': 'loss', 'content': 0.04953523352742195, 'timestamp': '2025-09-10 02:47:20.725825', 'step': 18002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:20.756871', 'step': 18002, 'epoch': 3} {'type': 'loss', 'content': 0.0648285299539566, 'timestamp': '2025-09-10 02:47:20.759103', 'step': 18003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:20.789345', 'step': 18003, 'epoch': 3} {'type': 'loss', 'content': 0.04291800409555435, 'timestamp': '2025-09-10 02:47:20.813143', 'step': 18004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:20.844030', 'step': 18004, 'epoch': 3} {'type': 'loss', 'content': 0.03492148220539093, 'timestamp': '2025-09-10 02:47:20.846320', 'step': 18005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:20.877288', 'step': 18005, 'epoch': 3} {'type': 'loss', 'content': 0.10003340989351273, 'timestamp': '2025-09-10 02:47:20.879671', 'step': 18006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:20.910812', 'step': 18006, 'epoch': 3} {'type': 'loss', 'content': 0.0406770221889019, 'timestamp': '2025-09-10 02:47:20.913447', 'step': 18007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:20.945737', 'step': 18007, 'epoch': 3} {'type': 'loss', 'content': 0.04802043363451958, 'timestamp': '2025-09-10 02:47:20.969248', 'step': 18008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.000166', 'step': 18008, 'epoch': 3} {'type': 'loss', 'content': 0.05797818675637245, 'timestamp': '2025-09-10 02:47:21.002425', 'step': 18009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:21.033610', 'step': 18009, 'epoch': 3} {'type': 'loss', 'content': 0.03498046472668648, 'timestamp': '2025-09-10 02:47:21.037074', 'step': 18010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:21.067965', 'step': 18010, 'epoch': 3} {'type': 'loss', 'content': 0.14044125378131866, 'timestamp': '2025-09-10 02:47:21.070200', 'step': 18011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:21.100431', 'step': 18011, 'epoch': 3} {'type': 'loss', 'content': 0.101439468562603, 'timestamp': '2025-09-10 02:47:21.124694', 'step': 18012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.155279', 'step': 18012, 'epoch': 3} {'type': 'loss', 'content': 0.07408753037452698, 'timestamp': '2025-09-10 02:47:21.157825', 'step': 18013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.188344', 'step': 18013, 'epoch': 3} {'type': 'loss', 'content': 0.051920294761657715, 'timestamp': '2025-09-10 02:47:21.190551', 'step': 18014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:21.220656', 'step': 18014, 'epoch': 3} {'type': 'loss', 'content': 0.06258181482553482, 'timestamp': '2025-09-10 02:47:21.223683', 'step': 18015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:21.253659', 'step': 18015, 'epoch': 3} {'type': 'loss', 'content': 0.07775737345218658, 'timestamp': '2025-09-10 02:47:21.277256', 'step': 18016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:21.307076', 'step': 18016, 'epoch': 3} {'type': 'loss', 'content': 0.041457440704107285, 'timestamp': '2025-09-10 02:47:21.309384', 'step': 18017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.339575', 'step': 18017, 'epoch': 3} {'type': 'loss', 'content': 0.029910633340477943, 'timestamp': '2025-09-10 02:47:21.341691', 'step': 18018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.372277', 'step': 18018, 'epoch': 3} {'type': 'loss', 'content': 0.037115756422281265, 'timestamp': '2025-09-10 02:47:21.374631', 'step': 18019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:21.405382', 'step': 18019, 'epoch': 3} {'type': 'loss', 'content': 0.035901982337236404, 'timestamp': '2025-09-10 02:47:21.429308', 'step': 18020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:21.459485', 'step': 18020, 'epoch': 3} {'type': 'loss', 'content': 0.10823716968297958, 'timestamp': '2025-09-10 02:47:21.462145', 'step': 18021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.493562', 'step': 18021, 'epoch': 3} {'type': 'loss', 'content': 0.026043323799967766, 'timestamp': '2025-09-10 02:47:21.495976', 'step': 18022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:21.525865', 'step': 18022, 'epoch': 3} {'type': 'loss', 'content': 0.1115490198135376, 'timestamp': '2025-09-10 02:47:21.528219', 'step': 18023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.557911', 'step': 18023, 'epoch': 3} {'type': 'loss', 'content': 0.040116045624017715, 'timestamp': '2025-09-10 02:47:21.581383', 'step': 18024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:21.614444', 'step': 18024, 'epoch': 3} {'type': 'loss', 'content': 0.13310861587524414, 'timestamp': '2025-09-10 02:47:21.617401', 'step': 18025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:21.647680', 'step': 18025, 'epoch': 3} {'type': 'loss', 'content': 0.05407218635082245, 'timestamp': '2025-09-10 02:47:21.650169', 'step': 18026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:21.680395', 'step': 18026, 'epoch': 3} {'type': 'loss', 'content': 0.07114974409341812, 'timestamp': '2025-09-10 02:47:21.682712', 'step': 18027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:21.713151', 'step': 18027, 'epoch': 3} {'type': 'loss', 'content': 0.03998951241374016, 'timestamp': '2025-09-10 02:47:21.736899', 'step': 18028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.768244', 'step': 18028, 'epoch': 3} {'type': 'loss', 'content': 0.029370417818427086, 'timestamp': '2025-09-10 02:47:21.770406', 'step': 18029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:21.802629', 'step': 18029, 'epoch': 3} {'type': 'loss', 'content': 0.06966513395309448, 'timestamp': '2025-09-10 02:47:21.804844', 'step': 18030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:21.834861', 'step': 18030, 'epoch': 3} {'type': 'loss', 'content': 0.06811654567718506, 'timestamp': '2025-09-10 02:47:21.837099', 'step': 18031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.869695', 'step': 18031, 'epoch': 3} {'type': 'loss', 'content': 0.10095151513814926, 'timestamp': '2025-09-10 02:47:21.892811', 'step': 18032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:21.924151', 'step': 18032, 'epoch': 3} {'type': 'loss', 'content': 0.07116426527500153, 'timestamp': '2025-09-10 02:47:21.926438', 'step': 18033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:21.966377', 'step': 18033, 'epoch': 3} {'type': 'loss', 'content': 0.05627644434571266, 'timestamp': '2025-09-10 02:47:21.969106', 'step': 18034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:22.005860', 'step': 18034, 'epoch': 3} {'type': 'loss', 'content': 0.05346568301320076, 'timestamp': '2025-09-10 02:47:22.008431', 'step': 18035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:22.049986', 'step': 18035, 'epoch': 3} {'type': 'loss', 'content': 0.14028070867061615, 'timestamp': '2025-09-10 02:47:22.082778', 'step': 18036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:22.164726', 'step': 18036, 'epoch': 3} {'type': 'loss', 'content': 0.12574394047260284, 'timestamp': '2025-09-10 02:47:22.177479', 'step': 18037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:22.218216', 'step': 18037, 'epoch': 3} {'type': 'loss', 'content': 0.048469047993421555, 'timestamp': '2025-09-10 02:47:22.220790', 'step': 18038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:22.258534', 'step': 18038, 'epoch': 3} {'type': 'loss', 'content': 0.105885349214077, 'timestamp': '2025-09-10 02:47:22.261818', 'step': 18039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:22.298532', 'step': 18039, 'epoch': 3} {'type': 'loss', 'content': 0.002067242981866002, 'timestamp': '2025-09-10 02:47:22.323174', 'step': 18040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:22.358644', 'step': 18040, 'epoch': 3} {'type': 'loss', 'content': 0.026992790400981903, 'timestamp': '2025-09-10 02:47:22.362182', 'step': 18041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:22.402345', 'step': 18041, 'epoch': 3} {'type': 'loss', 'content': 0.045229703187942505, 'timestamp': '2025-09-10 02:47:22.408637', 'step': 18042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:22.450561', 'step': 18042, 'epoch': 3} {'type': 'loss', 'content': 0.02971074916422367, 'timestamp': '2025-09-10 02:47:22.456993', 'step': 18043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:22.505107', 'step': 18043, 'epoch': 3} {'type': 'loss', 'content': 0.06924532353878021, 'timestamp': '2025-09-10 02:47:22.533376', 'step': 18044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:22.572120', 'step': 18044, 'epoch': 3} {'type': 'loss', 'content': 0.07434330135583878, 'timestamp': '2025-09-10 02:47:22.580996', 'step': 18045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:22.630128', 'step': 18045, 'epoch': 3} {'type': 'loss', 'content': 0.07726061344146729, 'timestamp': '2025-09-10 02:47:22.642417', 'step': 18046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:22.701462', 'step': 18046, 'epoch': 3} {'type': 'loss', 'content': 0.010470791719853878, 'timestamp': '2025-09-10 02:47:22.711837', 'step': 18047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:22.745725', 'step': 18047, 'epoch': 3} {'type': 'loss', 'content': 0.03393734246492386, 'timestamp': '2025-09-10 02:47:22.777397', 'step': 18048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:22.818157', 'step': 18048, 'epoch': 3} {'type': 'loss', 'content': 0.02480882965028286, 'timestamp': '2025-09-10 02:47:22.826186', 'step': 18049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:22.868216', 'step': 18049, 'epoch': 3} {'type': 'loss', 'content': 0.07403095811605453, 'timestamp': '2025-09-10 02:47:22.870751', 'step': 18050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:22.911190', 'step': 18050, 'epoch': 3} {'type': 'loss', 'content': 0.08006251603364944, 'timestamp': '2025-09-10 02:47:22.913684', 'step': 18051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:22.951194', 'step': 18051, 'epoch': 3} {'type': 'loss', 'content': 0.09740623086690903, 'timestamp': '2025-09-10 02:47:22.977905', 'step': 18052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:23.015335', 'step': 18052, 'epoch': 3} {'type': 'loss', 'content': 0.13709914684295654, 'timestamp': '2025-09-10 02:47:23.024440', 'step': 18053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:23.085358', 'step': 18053, 'epoch': 3} {'type': 'loss', 'content': 0.06778305023908615, 'timestamp': '2025-09-10 02:47:23.089068', 'step': 18054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:23.136503', 'step': 18054, 'epoch': 3} {'type': 'loss', 'content': 0.053805019706487656, 'timestamp': '2025-09-10 02:47:23.140986', 'step': 18055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:23.183183', 'step': 18055, 'epoch': 3} {'type': 'loss', 'content': 0.12376990914344788, 'timestamp': '2025-09-10 02:47:23.209632', 'step': 18056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:23.256431', 'step': 18056, 'epoch': 3} {'type': 'loss', 'content': 0.08550355583429337, 'timestamp': '2025-09-10 02:47:23.260411', 'step': 18057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:23.303319', 'step': 18057, 'epoch': 3} {'type': 'loss', 'content': 0.083567775785923, 'timestamp': '2025-09-10 02:47:23.314751', 'step': 18058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:23.357061', 'step': 18058, 'epoch': 3} {'type': 'loss', 'content': 0.04573599249124527, 'timestamp': '2025-09-10 02:47:23.359913', 'step': 18059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:23.393147', 'step': 18059, 'epoch': 3} {'type': 'loss', 'content': 0.08973554521799088, 'timestamp': '2025-09-10 02:47:23.420514', 'step': 18060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:47:23.488501', 'step': 18060, 'epoch': 3} {'type': 'loss', 'content': 0.057689372450113297, 'timestamp': '2025-09-10 02:47:23.493432', 'step': 18061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:23.530970', 'step': 18061, 'epoch': 3} {'type': 'loss', 'content': 0.028577595949172974, 'timestamp': '2025-09-10 02:47:23.535464', 'step': 18062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:23.620799', 'step': 18062, 'epoch': 3} {'type': 'loss', 'content': 0.13282670080661774, 'timestamp': '2025-09-10 02:47:23.627514', 'step': 18063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:23.697820', 'step': 18063, 'epoch': 3} {'type': 'loss', 'content': 0.043656300753355026, 'timestamp': '2025-09-10 02:47:23.723094', 'step': 18064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:23.803712', 'step': 18064, 'epoch': 3} {'type': 'loss', 'content': 0.061679355800151825, 'timestamp': '2025-09-10 02:47:23.827372', 'step': 18065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:23.902316', 'step': 18065, 'epoch': 3} {'type': 'loss', 'content': 0.09221680462360382, 'timestamp': '2025-09-10 02:47:23.919985', 'step': 18066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:23.978527', 'step': 18066, 'epoch': 3} {'type': 'loss', 'content': 0.06226009875535965, 'timestamp': '2025-09-10 02:47:23.981254', 'step': 18067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:24.022360', 'step': 18067, 'epoch': 3} {'type': 'loss', 'content': 0.07067157328128815, 'timestamp': '2025-09-10 02:47:24.048709', 'step': 18068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:24.086276', 'step': 18068, 'epoch': 3} {'type': 'loss', 'content': 0.07126781344413757, 'timestamp': '2025-09-10 02:47:24.090291', 'step': 18069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:24.125130', 'step': 18069, 'epoch': 3} {'type': 'loss', 'content': 0.06236681714653969, 'timestamp': '2025-09-10 02:47:24.130573', 'step': 18070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:24.184027', 'step': 18070, 'epoch': 3} {'type': 'loss', 'content': 0.06870795041322708, 'timestamp': '2025-09-10 02:47:24.193102', 'step': 18071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:24.242156', 'step': 18071, 'epoch': 3} {'type': 'loss', 'content': 0.05932571366429329, 'timestamp': '2025-09-10 02:47:24.267304', 'step': 18072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:24.304467', 'step': 18072, 'epoch': 3} {'type': 'loss', 'content': 0.06752899289131165, 'timestamp': '2025-09-10 02:47:24.306768', 'step': 18073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:24.336954', 'step': 18073, 'epoch': 3} {'type': 'loss', 'content': 0.07565628737211227, 'timestamp': '2025-09-10 02:47:24.341251', 'step': 18074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:24.382531', 'step': 18074, 'epoch': 3} {'type': 'loss', 'content': 0.09760519117116928, 'timestamp': '2025-09-10 02:47:24.393902', 'step': 18075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:24.429775', 'step': 18075, 'epoch': 3} {'type': 'loss', 'content': 0.06030360981822014, 'timestamp': '2025-09-10 02:47:24.456305', 'step': 18076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:24.493514', 'step': 18076, 'epoch': 3} {'type': 'loss', 'content': 0.06819836795330048, 'timestamp': '2025-09-10 02:47:24.498609', 'step': 18077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:24.536016', 'step': 18077, 'epoch': 3} {'type': 'loss', 'content': 0.08330859243869781, 'timestamp': '2025-09-10 02:47:24.559736', 'step': 18078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:24.616542', 'step': 18078, 'epoch': 3} {'type': 'loss', 'content': 0.032311178743839264, 'timestamp': '2025-09-10 02:47:24.623105', 'step': 18079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:24.676580', 'step': 18079, 'epoch': 3} {'type': 'loss', 'content': 0.12198403477668762, 'timestamp': '2025-09-10 02:47:24.713145', 'step': 18080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:24.751632', 'step': 18080, 'epoch': 3} {'type': 'loss', 'content': 0.09031201153993607, 'timestamp': '2025-09-10 02:47:24.756460', 'step': 18081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:24.792640', 'step': 18081, 'epoch': 3} {'type': 'loss', 'content': 0.04543188586831093, 'timestamp': '2025-09-10 02:47:24.796515', 'step': 18082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:24.829223', 'step': 18082, 'epoch': 3} {'type': 'loss', 'content': 0.13829977810382843, 'timestamp': '2025-09-10 02:47:24.832223', 'step': 18083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:24.866824', 'step': 18083, 'epoch': 3} {'type': 'loss', 'content': 0.07980957627296448, 'timestamp': '2025-09-10 02:47:24.892319', 'step': 18084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:24.923026', 'step': 18084, 'epoch': 3} {'type': 'loss', 'content': 0.06118584796786308, 'timestamp': '2025-09-10 02:47:24.926328', 'step': 18085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:24.960725', 'step': 18085, 'epoch': 3} {'type': 'loss', 'content': 0.060569457709789276, 'timestamp': '2025-09-10 02:47:24.962952', 'step': 18086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:24.993135', 'step': 18086, 'epoch': 3} {'type': 'loss', 'content': 0.1573195457458496, 'timestamp': '2025-09-10 02:47:24.995372', 'step': 18087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:25.025459', 'step': 18087, 'epoch': 3} {'type': 'loss', 'content': 0.10905672609806061, 'timestamp': '2025-09-10 02:47:25.049700', 'step': 18088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:25.079533', 'step': 18088, 'epoch': 3} {'type': 'loss', 'content': 0.05633382871747017, 'timestamp': '2025-09-10 02:47:25.082472', 'step': 18089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:25.114297', 'step': 18089, 'epoch': 3} {'type': 'loss', 'content': 0.048841435462236404, 'timestamp': '2025-09-10 02:47:25.117410', 'step': 18090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:25.148636', 'step': 18090, 'epoch': 3} {'type': 'loss', 'content': 0.05423804745078087, 'timestamp': '2025-09-10 02:47:25.151875', 'step': 18091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:25.182556', 'step': 18091, 'epoch': 3} {'type': 'loss', 'content': 0.09412222355604172, 'timestamp': '2025-09-10 02:47:25.207222', 'step': 18092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:25.238809', 'step': 18092, 'epoch': 3} {'type': 'loss', 'content': 0.09201579540967941, 'timestamp': '2025-09-10 02:47:25.241077', 'step': 18093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:25.271606', 'step': 18093, 'epoch': 3} {'type': 'loss', 'content': 0.10546478629112244, 'timestamp': '2025-09-10 02:47:25.274058', 'step': 18094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:25.306202', 'step': 18094, 'epoch': 3} {'type': 'loss', 'content': 0.11484675854444504, 'timestamp': '2025-09-10 02:47:25.309695', 'step': 18095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:25.341969', 'step': 18095, 'epoch': 3} {'type': 'loss', 'content': 0.05204153433442116, 'timestamp': '2025-09-10 02:47:25.366030', 'step': 18096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:25.396656', 'step': 18096, 'epoch': 3} {'type': 'loss', 'content': 0.05186967924237251, 'timestamp': '2025-09-10 02:47:25.399180', 'step': 18097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:25.430873', 'step': 18097, 'epoch': 3} {'type': 'loss', 'content': 0.10234341025352478, 'timestamp': '2025-09-10 02:47:25.433205', 'step': 18098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:25.463302', 'step': 18098, 'epoch': 3} {'type': 'loss', 'content': 0.07433458417654037, 'timestamp': '2025-09-10 02:47:25.465619', 'step': 18099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:25.496365', 'step': 18099, 'epoch': 3} {'type': 'loss', 'content': 0.05042069032788277, 'timestamp': '2025-09-10 02:47:25.520758', 'step': 18100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:25.552034', 'step': 18100, 'epoch': 3} {'type': 'loss', 'content': 0.082307830452919, 'timestamp': '2025-09-10 02:47:25.555234', 'step': 18101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:25.587463', 'step': 18101, 'epoch': 3} {'type': 'loss', 'content': 0.02338767796754837, 'timestamp': '2025-09-10 02:47:25.590834', 'step': 18102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:25.622939', 'step': 18102, 'epoch': 3} {'type': 'loss', 'content': 0.03813553974032402, 'timestamp': '2025-09-10 02:47:25.625532', 'step': 18103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:25.660839', 'step': 18103, 'epoch': 3} {'type': 'loss', 'content': 0.059850431978702545, 'timestamp': '2025-09-10 02:47:25.685416', 'step': 18104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:25.717221', 'step': 18104, 'epoch': 3} {'type': 'loss', 'content': 0.0767650157213211, 'timestamp': '2025-09-10 02:47:25.720680', 'step': 18105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:25.753545', 'step': 18105, 'epoch': 3} {'type': 'loss', 'content': 0.032476287335157394, 'timestamp': '2025-09-10 02:47:25.757477', 'step': 18106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:25.791693', 'step': 18106, 'epoch': 3} {'type': 'loss', 'content': 0.06439302861690521, 'timestamp': '2025-09-10 02:47:25.797563', 'step': 18107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:25.835567', 'step': 18107, 'epoch': 3} {'type': 'loss', 'content': 0.05270836874842644, 'timestamp': '2025-09-10 02:47:25.860616', 'step': 18108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:25.905806', 'step': 18108, 'epoch': 3} {'type': 'loss', 'content': 0.02508453093469143, 'timestamp': '2025-09-10 02:47:25.909108', 'step': 18109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:25.979581', 'step': 18109, 'epoch': 3} {'type': 'loss', 'content': 0.024785928428173065, 'timestamp': '2025-09-10 02:47:25.986260', 'step': 18110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:26.027882', 'step': 18110, 'epoch': 3} {'type': 'loss', 'content': 0.041531190276145935, 'timestamp': '2025-09-10 02:47:26.035195', 'step': 18111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:26.075885', 'step': 18111, 'epoch': 3} {'type': 'loss', 'content': 0.014919252134859562, 'timestamp': '2025-09-10 02:47:26.103585', 'step': 18112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:26.144984', 'step': 18112, 'epoch': 3} {'type': 'loss', 'content': 0.0373995304107666, 'timestamp': '2025-09-10 02:47:26.157715', 'step': 18113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:26.192354', 'step': 18113, 'epoch': 3} {'type': 'loss', 'content': 0.08671668916940689, 'timestamp': '2025-09-10 02:47:26.196314', 'step': 18114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.236192', 'step': 18114, 'epoch': 3} {'type': 'loss', 'content': 0.0725138857960701, 'timestamp': '2025-09-10 02:47:26.244686', 'step': 18115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.292345', 'step': 18115, 'epoch': 3} {'type': 'loss', 'content': 0.07800157368183136, 'timestamp': '2025-09-10 02:47:26.315655', 'step': 18116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.347876', 'step': 18116, 'epoch': 3} {'type': 'loss', 'content': 0.04999954625964165, 'timestamp': '2025-09-10 02:47:26.350002', 'step': 18117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:26.379256', 'step': 18117, 'epoch': 3} {'type': 'loss', 'content': 0.0341811366379261, 'timestamp': '2025-09-10 02:47:26.381712', 'step': 18118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.411333', 'step': 18118, 'epoch': 3} {'type': 'loss', 'content': 0.08141027390956879, 'timestamp': '2025-09-10 02:47:26.413979', 'step': 18119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:26.445369', 'step': 18119, 'epoch': 3} {'type': 'loss', 'content': 0.05022589489817619, 'timestamp': '2025-09-10 02:47:26.469348', 'step': 18120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.500343', 'step': 18120, 'epoch': 3} {'type': 'loss', 'content': 0.045141398906707764, 'timestamp': '2025-09-10 02:47:26.503348', 'step': 18121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.533550', 'step': 18121, 'epoch': 3} {'type': 'loss', 'content': 0.07519545406103134, 'timestamp': '2025-09-10 02:47:26.536202', 'step': 18122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:26.566383', 'step': 18122, 'epoch': 3} {'type': 'loss', 'content': 0.09992430359125137, 'timestamp': '2025-09-10 02:47:26.568869', 'step': 18123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:26.599970', 'step': 18123, 'epoch': 3} {'type': 'loss', 'content': 0.10268735140562057, 'timestamp': '2025-09-10 02:47:26.623715', 'step': 18124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:26.655044', 'step': 18124, 'epoch': 3} {'type': 'loss', 'content': 0.055063970386981964, 'timestamp': '2025-09-10 02:47:26.657463', 'step': 18125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.687790', 'step': 18125, 'epoch': 3} {'type': 'loss', 'content': 0.0922371968626976, 'timestamp': '2025-09-10 02:47:26.689993', 'step': 18126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:26.723026', 'step': 18126, 'epoch': 3} {'type': 'loss', 'content': 0.12984880805015564, 'timestamp': '2025-09-10 02:47:26.725665', 'step': 18127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:26.757049', 'step': 18127, 'epoch': 3} {'type': 'loss', 'content': 0.05937861278653145, 'timestamp': '2025-09-10 02:47:26.782467', 'step': 18128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:26.813731', 'step': 18128, 'epoch': 3} {'type': 'loss', 'content': 0.03755386546254158, 'timestamp': '2025-09-10 02:47:26.816265', 'step': 18129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.846943', 'step': 18129, 'epoch': 3} {'type': 'loss', 'content': 0.09786739200353622, 'timestamp': '2025-09-10 02:47:26.849214', 'step': 18130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:26.879393', 'step': 18130, 'epoch': 3} {'type': 'loss', 'content': 0.07294981926679611, 'timestamp': '2025-09-10 02:47:26.881641', 'step': 18131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:26.911986', 'step': 18131, 'epoch': 3} {'type': 'loss', 'content': 0.027904195711016655, 'timestamp': '2025-09-10 02:47:26.935379', 'step': 18132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.966325', 'step': 18132, 'epoch': 3} {'type': 'loss', 'content': 0.07902271300554276, 'timestamp': '2025-09-10 02:47:26.968736', 'step': 18133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:26.999418', 'step': 18133, 'epoch': 3} {'type': 'loss', 'content': 0.05377369001507759, 'timestamp': '2025-09-10 02:47:27.001818', 'step': 18134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:27.031763', 'step': 18134, 'epoch': 3} {'type': 'loss', 'content': 0.09504037350416183, 'timestamp': '2025-09-10 02:47:27.034272', 'step': 18135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:27.064631', 'step': 18135, 'epoch': 3} {'type': 'loss', 'content': 0.08539628237485886, 'timestamp': '2025-09-10 02:47:27.088276', 'step': 18136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:27.118192', 'step': 18136, 'epoch': 3} {'type': 'loss', 'content': 0.1538258194923401, 'timestamp': '2025-09-10 02:47:27.120840', 'step': 18137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:27.153368', 'step': 18137, 'epoch': 3} {'type': 'loss', 'content': 0.05337684229016304, 'timestamp': '2025-09-10 02:47:27.156221', 'step': 18138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:27.186692', 'step': 18138, 'epoch': 3} {'type': 'loss', 'content': 0.10664349794387817, 'timestamp': '2025-09-10 02:47:27.189582', 'step': 18139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:27.221400', 'step': 18139, 'epoch': 3} {'type': 'loss', 'content': 0.05902940034866333, 'timestamp': '2025-09-10 02:47:27.245483', 'step': 18140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:27.276654', 'step': 18140, 'epoch': 3} {'type': 'loss', 'content': 0.10936423391103745, 'timestamp': '2025-09-10 02:47:27.278863', 'step': 18141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:27.308934', 'step': 18141, 'epoch': 3} {'type': 'loss', 'content': 0.09237298369407654, 'timestamp': '2025-09-10 02:47:27.311252', 'step': 18142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:27.341517', 'step': 18142, 'epoch': 3} {'type': 'loss', 'content': 0.09266773611307144, 'timestamp': '2025-09-10 02:47:27.343669', 'step': 18143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:47:27.374459', 'step': 18143, 'epoch': 3} {'type': 'loss', 'content': 0.12144677340984344, 'timestamp': '2025-09-10 02:47:27.399697', 'step': 18144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:27.430161', 'step': 18144, 'epoch': 3} {'type': 'loss', 'content': 0.03252045437693596, 'timestamp': '2025-09-10 02:47:27.433634', 'step': 18145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:27.463282', 'step': 18145, 'epoch': 3} {'type': 'loss', 'content': 0.06849466264247894, 'timestamp': '2025-09-10 02:47:27.465484', 'step': 18146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:27.495452', 'step': 18146, 'epoch': 3} {'type': 'loss', 'content': 0.1152305155992508, 'timestamp': '2025-09-10 02:47:27.497760', 'step': 18147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:27.529130', 'step': 18147, 'epoch': 3} {'type': 'loss', 'content': 0.12757983803749084, 'timestamp': '2025-09-10 02:47:27.553950', 'step': 18148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:27.586988', 'step': 18148, 'epoch': 3} {'type': 'loss', 'content': 0.03767738863825798, 'timestamp': '2025-09-10 02:47:27.589635', 'step': 18149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:27.620494', 'step': 18149, 'epoch': 3} {'type': 'loss', 'content': 0.05652123689651489, 'timestamp': '2025-09-10 02:47:27.623515', 'step': 18150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:27.653892', 'step': 18150, 'epoch': 3} {'type': 'loss', 'content': 0.05619962513446808, 'timestamp': '2025-09-10 02:47:27.658260', 'step': 18151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:27.688032', 'step': 18151, 'epoch': 3} {'type': 'loss', 'content': 0.047516025602817535, 'timestamp': '2025-09-10 02:47:27.711789', 'step': 18152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:27.741877', 'step': 18152, 'epoch': 3} {'type': 'loss', 'content': 0.03002154640853405, 'timestamp': '2025-09-10 02:47:27.744119', 'step': 18153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:27.776042', 'step': 18153, 'epoch': 3} {'type': 'loss', 'content': 0.059602491557598114, 'timestamp': '2025-09-10 02:47:27.778235', 'step': 18154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:27.808328', 'step': 18154, 'epoch': 3} {'type': 'loss', 'content': 0.20298771560192108, 'timestamp': '2025-09-10 02:47:27.811091', 'step': 18155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:27.841230', 'step': 18155, 'epoch': 3} {'type': 'loss', 'content': 0.028985783457756042, 'timestamp': '2025-09-10 02:47:27.864854', 'step': 18156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:27.895966', 'step': 18156, 'epoch': 3} {'type': 'loss', 'content': 0.09013809263706207, 'timestamp': '2025-09-10 02:47:27.898243', 'step': 18157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:27.928384', 'step': 18157, 'epoch': 3} {'type': 'loss', 'content': 0.08341643214225769, 'timestamp': '2025-09-10 02:47:27.930978', 'step': 18158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:27.963227', 'step': 18158, 'epoch': 3} {'type': 'loss', 'content': 0.06525660306215286, 'timestamp': '2025-09-10 02:47:27.965821', 'step': 18159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:27.996311', 'step': 18159, 'epoch': 3} {'type': 'loss', 'content': 0.058965880423784256, 'timestamp': '2025-09-10 02:47:28.019788', 'step': 18160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:28.050660', 'step': 18160, 'epoch': 3} {'type': 'loss', 'content': 0.02541506662964821, 'timestamp': '2025-09-10 02:47:28.052729', 'step': 18161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:28.082585', 'step': 18161, 'epoch': 3} {'type': 'loss', 'content': 0.17475132644176483, 'timestamp': '2025-09-10 02:47:28.084982', 'step': 18162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:28.114895', 'step': 18162, 'epoch': 3} {'type': 'loss', 'content': 0.008984020911157131, 'timestamp': '2025-09-10 02:47:28.117265', 'step': 18163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:28.146944', 'step': 18163, 'epoch': 3} {'type': 'loss', 'content': 0.08774279803037643, 'timestamp': '2025-09-10 02:47:28.170436', 'step': 18164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:28.201041', 'step': 18164, 'epoch': 3} {'type': 'loss', 'content': 0.04004117473959923, 'timestamp': '2025-09-10 02:47:28.203645', 'step': 18165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:28.234966', 'step': 18165, 'epoch': 3} {'type': 'loss', 'content': 0.05016176775097847, 'timestamp': '2025-09-10 02:47:28.239078', 'step': 18166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:28.271684', 'step': 18166, 'epoch': 3} {'type': 'loss', 'content': 0.03266121819615364, 'timestamp': '2025-09-10 02:47:28.274217', 'step': 18167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:28.304531', 'step': 18167, 'epoch': 3} {'type': 'loss', 'content': 0.16199162602424622, 'timestamp': '2025-09-10 02:47:28.328096', 'step': 18168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:28.359575', 'step': 18168, 'epoch': 3} {'type': 'loss', 'content': 0.10525096207857132, 'timestamp': '2025-09-10 02:47:28.361854', 'step': 18169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:28.392032', 'step': 18169, 'epoch': 3} {'type': 'loss', 'content': 0.025966906920075417, 'timestamp': '2025-09-10 02:47:28.394596', 'step': 18170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:28.425522', 'step': 18170, 'epoch': 3} {'type': 'loss', 'content': 0.04851952940225601, 'timestamp': '2025-09-10 02:47:28.427907', 'step': 18171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:28.457808', 'step': 18171, 'epoch': 3} {'type': 'loss', 'content': 0.08320896327495575, 'timestamp': '2025-09-10 02:47:28.481485', 'step': 18172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:28.511848', 'step': 18172, 'epoch': 3} {'type': 'loss', 'content': 0.04496944695711136, 'timestamp': '2025-09-10 02:47:28.514259', 'step': 18173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:28.544392', 'step': 18173, 'epoch': 3} {'type': 'loss', 'content': 0.07601309567689896, 'timestamp': '2025-09-10 02:47:28.546769', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:47:36.340850', 'step': 18174, 'epoch': 3} {'type': 'pplx', 'content': 12522.31137778616, 'timestamp': '2025-09-10 02:47:36.345399', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:36.374744', 'step': 18174, 'epoch': 3} {'type': 'loss', 'content': 0.0596102699637413, 'timestamp': '2025-09-10 02:47:36.376907', 'step': 18175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:36.407588', 'step': 18175, 'epoch': 3} {'type': 'loss', 'content': 0.051594819873571396, 'timestamp': '2025-09-10 02:47:36.431672', 'step': 18176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:36.462338', 'step': 18176, 'epoch': 3} {'type': 'loss', 'content': 0.05472274124622345, 'timestamp': '2025-09-10 02:47:36.464560', 'step': 18177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:36.494944', 'step': 18177, 'epoch': 3} {'type': 'loss', 'content': 0.08356435596942902, 'timestamp': '2025-09-10 02:47:36.497276', 'step': 18178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:36.528554', 'step': 18178, 'epoch': 3} {'type': 'loss', 'content': 0.09760205447673798, 'timestamp': '2025-09-10 02:47:36.531182', 'step': 18179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:36.562579', 'step': 18179, 'epoch': 3} {'type': 'loss', 'content': 0.03718139976263046, 'timestamp': '2025-09-10 02:47:36.586428', 'step': 18180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:36.616751', 'step': 18180, 'epoch': 3} {'type': 'loss', 'content': 0.11871436983346939, 'timestamp': '2025-09-10 02:47:36.619251', 'step': 18181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:36.649786', 'step': 18181, 'epoch': 3} {'type': 'loss', 'content': 0.04107893630862236, 'timestamp': '2025-09-10 02:47:36.652089', 'step': 18182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:36.681941', 'step': 18182, 'epoch': 3} {'type': 'loss', 'content': 0.054582979530096054, 'timestamp': '2025-09-10 02:47:36.684638', 'step': 18183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:36.715343', 'step': 18183, 'epoch': 3} {'type': 'loss', 'content': 0.03831583261489868, 'timestamp': '2025-09-10 02:47:36.740339', 'step': 18184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:36.771302', 'step': 18184, 'epoch': 3} {'type': 'loss', 'content': 0.05234096944332123, 'timestamp': '2025-09-10 02:47:36.773922', 'step': 18185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:36.804446', 'step': 18185, 'epoch': 3} {'type': 'loss', 'content': 0.07527470588684082, 'timestamp': '2025-09-10 02:47:36.806901', 'step': 18186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:36.837914', 'step': 18186, 'epoch': 3} {'type': 'loss', 'content': 0.1411803513765335, 'timestamp': '2025-09-10 02:47:36.840863', 'step': 18187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:36.871900', 'step': 18187, 'epoch': 3} {'type': 'loss', 'content': 0.07980705797672272, 'timestamp': '2025-09-10 02:47:36.895562', 'step': 18188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:36.926015', 'step': 18188, 'epoch': 3} {'type': 'loss', 'content': 0.06163228675723076, 'timestamp': '2025-09-10 02:47:36.928568', 'step': 18189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:36.959289', 'step': 18189, 'epoch': 3} {'type': 'loss', 'content': 0.059178676456213, 'timestamp': '2025-09-10 02:47:36.961977', 'step': 18190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:36.993024', 'step': 18190, 'epoch': 3} {'type': 'loss', 'content': 0.05128701031208038, 'timestamp': '2025-09-10 02:47:36.995601', 'step': 18191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:37.025380', 'step': 18191, 'epoch': 3} {'type': 'loss', 'content': 0.01179565116763115, 'timestamp': '2025-09-10 02:47:37.049423', 'step': 18192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:37.081806', 'step': 18192, 'epoch': 3} {'type': 'loss', 'content': 0.08963028341531754, 'timestamp': '2025-09-10 02:47:37.084160', 'step': 18193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:37.114390', 'step': 18193, 'epoch': 3} {'type': 'loss', 'content': 0.05799867957830429, 'timestamp': '2025-09-10 02:47:37.117409', 'step': 18194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:37.147602', 'step': 18194, 'epoch': 3} {'type': 'loss', 'content': 0.03705061972141266, 'timestamp': '2025-09-10 02:47:37.150219', 'step': 18195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:37.182999', 'step': 18195, 'epoch': 3} {'type': 'loss', 'content': 0.11865826696157455, 'timestamp': '2025-09-10 02:47:37.206728', 'step': 18196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:37.236784', 'step': 18196, 'epoch': 3} {'type': 'loss', 'content': 0.05823930725455284, 'timestamp': '2025-09-10 02:47:37.239040', 'step': 18197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:37.269507', 'step': 18197, 'epoch': 3} {'type': 'loss', 'content': 0.055626995861530304, 'timestamp': '2025-09-10 02:47:37.271998', 'step': 18198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:37.304022', 'step': 18198, 'epoch': 3} {'type': 'loss', 'content': 0.13697655498981476, 'timestamp': '2025-09-10 02:47:37.306362', 'step': 18199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:37.336031', 'step': 18199, 'epoch': 3} {'type': 'loss', 'content': 0.06945819407701492, 'timestamp': '2025-09-10 02:47:37.359688', 'step': 18200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:37.390348', 'step': 18200, 'epoch': 3} {'type': 'loss', 'content': 0.035270463675260544, 'timestamp': '2025-09-10 02:47:37.393034', 'step': 18201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:37.423136', 'step': 18201, 'epoch': 3} {'type': 'loss', 'content': 0.03778375685214996, 'timestamp': '2025-09-10 02:47:37.425591', 'step': 18202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:37.456092', 'step': 18202, 'epoch': 3} {'type': 'loss', 'content': 0.1647324562072754, 'timestamp': '2025-09-10 02:47:37.459394', 'step': 18203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:37.489650', 'step': 18203, 'epoch': 3} {'type': 'loss', 'content': 0.0808999314904213, 'timestamp': '2025-09-10 02:47:37.513179', 'step': 18204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:37.544230', 'step': 18204, 'epoch': 3} {'type': 'loss', 'content': 0.10160326957702637, 'timestamp': '2025-09-10 02:47:37.546619', 'step': 18205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:37.576951', 'step': 18205, 'epoch': 3} {'type': 'loss', 'content': 0.03344085440039635, 'timestamp': '2025-09-10 02:47:37.579582', 'step': 18206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:37.610767', 'step': 18206, 'epoch': 3} {'type': 'loss', 'content': 0.08323626220226288, 'timestamp': '2025-09-10 02:47:37.613329', 'step': 18207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:37.643470', 'step': 18207, 'epoch': 3} {'type': 'loss', 'content': 0.09965354204177856, 'timestamp': '2025-09-10 02:47:37.667109', 'step': 18208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:37.697268', 'step': 18208, 'epoch': 3} {'type': 'loss', 'content': 0.07738591730594635, 'timestamp': '2025-09-10 02:47:37.700349', 'step': 18209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:37.730248', 'step': 18209, 'epoch': 3} {'type': 'loss', 'content': 0.10535579174757004, 'timestamp': '2025-09-10 02:47:37.732904', 'step': 18210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:37.763361', 'step': 18210, 'epoch': 3} {'type': 'loss', 'content': 0.07433068007230759, 'timestamp': '2025-09-10 02:47:37.765974', 'step': 18211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:37.797443', 'step': 18211, 'epoch': 3} {'type': 'loss', 'content': 0.0037083174102008343, 'timestamp': '2025-09-10 02:47:37.821081', 'step': 18212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:37.852348', 'step': 18212, 'epoch': 3} {'type': 'loss', 'content': 0.12570594251155853, 'timestamp': '2025-09-10 02:47:37.854786', 'step': 18213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:37.885034', 'step': 18213, 'epoch': 3} {'type': 'loss', 'content': 0.0929391160607338, 'timestamp': '2025-09-10 02:47:37.887428', 'step': 18214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:37.917317', 'step': 18214, 'epoch': 3} {'type': 'loss', 'content': 0.04444468021392822, 'timestamp': '2025-09-10 02:47:37.919904', 'step': 18215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:37.950847', 'step': 18215, 'epoch': 3} {'type': 'loss', 'content': 0.055698078125715256, 'timestamp': '2025-09-10 02:47:37.974383', 'step': 18216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:38.005701', 'step': 18216, 'epoch': 3} {'type': 'loss', 'content': 0.07674548774957657, 'timestamp': '2025-09-10 02:47:38.008178', 'step': 18217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:38.037970', 'step': 18217, 'epoch': 3} {'type': 'loss', 'content': 0.06498991698026657, 'timestamp': '2025-09-10 02:47:38.040697', 'step': 18218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.071201', 'step': 18218, 'epoch': 3} {'type': 'loss', 'content': 0.01984741911292076, 'timestamp': '2025-09-10 02:47:38.073316', 'step': 18219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:38.103641', 'step': 18219, 'epoch': 3} {'type': 'loss', 'content': 0.1319427192211151, 'timestamp': '2025-09-10 02:47:38.127421', 'step': 18220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.158757', 'step': 18220, 'epoch': 3} {'type': 'loss', 'content': 0.04688183590769768, 'timestamp': '2025-09-10 02:47:38.161267', 'step': 18221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.192699', 'step': 18221, 'epoch': 3} {'type': 'loss', 'content': 0.13606593012809753, 'timestamp': '2025-09-10 02:47:38.194932', 'step': 18222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.225307', 'step': 18222, 'epoch': 3} {'type': 'loss', 'content': 0.0487859882414341, 'timestamp': '2025-09-10 02:47:38.227626', 'step': 18223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.257889', 'step': 18223, 'epoch': 3} {'type': 'loss', 'content': 0.05034097656607628, 'timestamp': '2025-09-10 02:47:38.281480', 'step': 18224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:38.313162', 'step': 18224, 'epoch': 3} {'type': 'loss', 'content': 0.06768167018890381, 'timestamp': '2025-09-10 02:47:38.315497', 'step': 18225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:38.345180', 'step': 18225, 'epoch': 3} {'type': 'loss', 'content': 0.029473837465047836, 'timestamp': '2025-09-10 02:47:38.347812', 'step': 18226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:38.378902', 'step': 18226, 'epoch': 3} {'type': 'loss', 'content': 0.036354877054691315, 'timestamp': '2025-09-10 02:47:38.381406', 'step': 18227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:38.412502', 'step': 18227, 'epoch': 3} {'type': 'loss', 'content': 0.09296219795942307, 'timestamp': '2025-09-10 02:47:38.436217', 'step': 18228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.466625', 'step': 18228, 'epoch': 3} {'type': 'loss', 'content': 0.09133358299732208, 'timestamp': '2025-09-10 02:47:38.469106', 'step': 18229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.501142', 'step': 18229, 'epoch': 3} {'type': 'loss', 'content': 0.036903295665979385, 'timestamp': '2025-09-10 02:47:38.503610', 'step': 18230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.534490', 'step': 18230, 'epoch': 3} {'type': 'loss', 'content': 0.12122843414545059, 'timestamp': '2025-09-10 02:47:38.538619', 'step': 18231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:38.570165', 'step': 18231, 'epoch': 3} {'type': 'loss', 'content': 0.062354184687137604, 'timestamp': '2025-09-10 02:47:38.594938', 'step': 18232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:38.625412', 'step': 18232, 'epoch': 3} {'type': 'loss', 'content': 0.060188502073287964, 'timestamp': '2025-09-10 02:47:38.627520', 'step': 18233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:38.658353', 'step': 18233, 'epoch': 3} {'type': 'loss', 'content': 0.05809438228607178, 'timestamp': '2025-09-10 02:47:38.660822', 'step': 18234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:38.691128', 'step': 18234, 'epoch': 3} {'type': 'loss', 'content': 0.08147875219583511, 'timestamp': '2025-09-10 02:47:38.693622', 'step': 18235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.724468', 'step': 18235, 'epoch': 3} {'type': 'loss', 'content': 0.04458307847380638, 'timestamp': '2025-09-10 02:47:38.748239', 'step': 18236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:38.779908', 'step': 18236, 'epoch': 3} {'type': 'loss', 'content': 0.02998635545372963, 'timestamp': '2025-09-10 02:47:38.782380', 'step': 18237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:38.814592', 'step': 18237, 'epoch': 3} {'type': 'loss', 'content': 0.040711864829063416, 'timestamp': '2025-09-10 02:47:38.816978', 'step': 18238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:38.847809', 'step': 18238, 'epoch': 3} {'type': 'loss', 'content': 0.06897307932376862, 'timestamp': '2025-09-10 02:47:38.849883', 'step': 18239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.880134', 'step': 18239, 'epoch': 3} {'type': 'loss', 'content': 0.0356312058866024, 'timestamp': '2025-09-10 02:47:38.903611', 'step': 18240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:38.934043', 'step': 18240, 'epoch': 3} {'type': 'loss', 'content': 0.07013517618179321, 'timestamp': '2025-09-10 02:47:38.936383', 'step': 18241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:38.966540', 'step': 18241, 'epoch': 3} {'type': 'loss', 'content': 0.022772084921598434, 'timestamp': '2025-09-10 02:47:38.968806', 'step': 18242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:38.999051', 'step': 18242, 'epoch': 3} {'type': 'loss', 'content': 0.015231572091579437, 'timestamp': '2025-09-10 02:47:39.003389', 'step': 18243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:39.033310', 'step': 18243, 'epoch': 3} {'type': 'loss', 'content': 0.05077122896909714, 'timestamp': '2025-09-10 02:47:39.057082', 'step': 18244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:39.088794', 'step': 18244, 'epoch': 3} {'type': 'loss', 'content': 0.12337055802345276, 'timestamp': '2025-09-10 02:47:39.091572', 'step': 18245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:39.122668', 'step': 18245, 'epoch': 3} {'type': 'loss', 'content': 0.09609872102737427, 'timestamp': '2025-09-10 02:47:39.124994', 'step': 18246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:39.156984', 'step': 18246, 'epoch': 3} {'type': 'loss', 'content': 0.06867191195487976, 'timestamp': '2025-09-10 02:47:39.160514', 'step': 18247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:47:39.194127', 'step': 18247, 'epoch': 3} {'type': 'loss', 'content': 0.09813672304153442, 'timestamp': '2025-09-10 02:47:39.219223', 'step': 18248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:39.251997', 'step': 18248, 'epoch': 3} {'type': 'loss', 'content': 0.09541834890842438, 'timestamp': '2025-09-10 02:47:39.254507', 'step': 18249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:39.287754', 'step': 18249, 'epoch': 3} {'type': 'loss', 'content': 0.053034111857414246, 'timestamp': '2025-09-10 02:47:39.291113', 'step': 18250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:39.332384', 'step': 18250, 'epoch': 3} {'type': 'loss', 'content': 0.016322560608386993, 'timestamp': '2025-09-10 02:47:39.337650', 'step': 18251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:39.377795', 'step': 18251, 'epoch': 3} {'type': 'loss', 'content': 0.03373953700065613, 'timestamp': '2025-09-10 02:47:39.405465', 'step': 18252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:39.438688', 'step': 18252, 'epoch': 3} {'type': 'loss', 'content': 0.03244366496801376, 'timestamp': '2025-09-10 02:47:39.441128', 'step': 18253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:39.471197', 'step': 18253, 'epoch': 3} {'type': 'loss', 'content': 0.08373264968395233, 'timestamp': '2025-09-10 02:47:39.473818', 'step': 18254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:39.504513', 'step': 18254, 'epoch': 3} {'type': 'loss', 'content': 0.010058374144136906, 'timestamp': '2025-09-10 02:47:39.506878', 'step': 18255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:39.539868', 'step': 18255, 'epoch': 3} {'type': 'loss', 'content': 0.10447477549314499, 'timestamp': '2025-09-10 02:47:39.564948', 'step': 18256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:39.597193', 'step': 18256, 'epoch': 3} {'type': 'loss', 'content': 0.03624126315116882, 'timestamp': '2025-09-10 02:47:39.601172', 'step': 18257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:39.631584', 'step': 18257, 'epoch': 3} {'type': 'loss', 'content': 0.04940047860145569, 'timestamp': '2025-09-10 02:47:39.634245', 'step': 18258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:39.664787', 'step': 18258, 'epoch': 3} {'type': 'loss', 'content': 0.1106262356042862, 'timestamp': '2025-09-10 02:47:39.669376', 'step': 18259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-10 02:47:39.711317', 'step': 18259, 'epoch': 3} {'type': 'loss', 'content': 0.036873750388622284, 'timestamp': '2025-09-10 02:47:39.748201', 'step': 18260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:39.778822', 'step': 18260, 'epoch': 3} {'type': 'loss', 'content': 0.050868477672338486, 'timestamp': '2025-09-10 02:47:39.781220', 'step': 18261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:39.812876', 'step': 18261, 'epoch': 3} {'type': 'loss', 'content': 0.11012984067201614, 'timestamp': '2025-09-10 02:47:39.816224', 'step': 18262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:39.846184', 'step': 18262, 'epoch': 3} {'type': 'loss', 'content': 0.09204863756895065, 'timestamp': '2025-09-10 02:47:39.854129', 'step': 18263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:39.885513', 'step': 18263, 'epoch': 3} {'type': 'loss', 'content': 0.11227492988109589, 'timestamp': '2025-09-10 02:47:39.910514', 'step': 18264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:39.944130', 'step': 18264, 'epoch': 3} {'type': 'loss', 'content': 0.05412490293383598, 'timestamp': '2025-09-10 02:47:39.946825', 'step': 18265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:39.978341', 'step': 18265, 'epoch': 3} {'type': 'loss', 'content': 0.08753345161676407, 'timestamp': '2025-09-10 02:47:39.981454', 'step': 18266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:40.012335', 'step': 18266, 'epoch': 3} {'type': 'loss', 'content': 0.026249093934893608, 'timestamp': '2025-09-10 02:47:40.020530', 'step': 18267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.058534', 'step': 18267, 'epoch': 3} {'type': 'loss', 'content': 0.053679611533880234, 'timestamp': '2025-09-10 02:47:40.082710', 'step': 18268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.115656', 'step': 18268, 'epoch': 3} {'type': 'loss', 'content': 0.10824794322252274, 'timestamp': '2025-09-10 02:47:40.118272', 'step': 18269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:40.149934', 'step': 18269, 'epoch': 3} {'type': 'loss', 'content': 0.055805716663599014, 'timestamp': '2025-09-10 02:47:40.152706', 'step': 18270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.182755', 'step': 18270, 'epoch': 3} {'type': 'loss', 'content': 0.04232315719127655, 'timestamp': '2025-09-10 02:47:40.185020', 'step': 18271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.215235', 'step': 18271, 'epoch': 3} {'type': 'loss', 'content': 0.06077050790190697, 'timestamp': '2025-09-10 02:47:40.239019', 'step': 18272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:40.269581', 'step': 18272, 'epoch': 3} {'type': 'loss', 'content': 0.04581739380955696, 'timestamp': '2025-09-10 02:47:40.271857', 'step': 18273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:40.301746', 'step': 18273, 'epoch': 3} {'type': 'loss', 'content': 0.04434160515666008, 'timestamp': '2025-09-10 02:47:40.304685', 'step': 18274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:40.338237', 'step': 18274, 'epoch': 3} {'type': 'loss', 'content': 0.15732134878635406, 'timestamp': '2025-09-10 02:47:40.340778', 'step': 18275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:40.371311', 'step': 18275, 'epoch': 3} {'type': 'loss', 'content': 0.09348196536302567, 'timestamp': '2025-09-10 02:47:40.394894', 'step': 18276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.425187', 'step': 18276, 'epoch': 3} {'type': 'loss', 'content': 0.06300709396600723, 'timestamp': '2025-09-10 02:47:40.428043', 'step': 18277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:40.458188', 'step': 18277, 'epoch': 3} {'type': 'loss', 'content': 0.03148968890309334, 'timestamp': '2025-09-10 02:47:40.460689', 'step': 18278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:40.491351', 'step': 18278, 'epoch': 3} {'type': 'loss', 'content': 0.08768563717603683, 'timestamp': '2025-09-10 02:47:40.493794', 'step': 18279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:40.525165', 'step': 18279, 'epoch': 3} {'type': 'loss', 'content': 0.11115937680006027, 'timestamp': '2025-09-10 02:47:40.548844', 'step': 18280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.578964', 'step': 18280, 'epoch': 3} {'type': 'loss', 'content': 0.12397255003452301, 'timestamp': '2025-09-10 02:47:40.581559', 'step': 18281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.611948', 'step': 18281, 'epoch': 3} {'type': 'loss', 'content': 0.0597047284245491, 'timestamp': '2025-09-10 02:47:40.614417', 'step': 18282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:40.644223', 'step': 18282, 'epoch': 3} {'type': 'loss', 'content': 0.007571827154606581, 'timestamp': '2025-09-10 02:47:40.646541', 'step': 18283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.676943', 'step': 18283, 'epoch': 3} {'type': 'loss', 'content': 0.10363894701004028, 'timestamp': '2025-09-10 02:47:40.700536', 'step': 18284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.730569', 'step': 18284, 'epoch': 3} {'type': 'loss', 'content': 0.12097849696874619, 'timestamp': '2025-09-10 02:47:40.733078', 'step': 18285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:40.764520', 'step': 18285, 'epoch': 3} {'type': 'loss', 'content': 0.02095606178045273, 'timestamp': '2025-09-10 02:47:40.767123', 'step': 18286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:40.797236', 'step': 18286, 'epoch': 3} {'type': 'loss', 'content': 0.08913161605596542, 'timestamp': '2025-09-10 02:47:40.802380', 'step': 18287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:40.833031', 'step': 18287, 'epoch': 3} {'type': 'loss', 'content': 0.0440523587167263, 'timestamp': '2025-09-10 02:47:40.858146', 'step': 18288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:40.889428', 'step': 18288, 'epoch': 3} {'type': 'loss', 'content': 0.07573877274990082, 'timestamp': '2025-09-10 02:47:40.892187', 'step': 18289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:40.922918', 'step': 18289, 'epoch': 3} {'type': 'loss', 'content': 0.06400950253009796, 'timestamp': '2025-09-10 02:47:40.925341', 'step': 18290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:40.955203', 'step': 18290, 'epoch': 3} {'type': 'loss', 'content': 0.09791145473718643, 'timestamp': '2025-09-10 02:47:40.957996', 'step': 18291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:40.988401', 'step': 18291, 'epoch': 3} {'type': 'loss', 'content': 0.04417549818754196, 'timestamp': '2025-09-10 02:47:41.011937', 'step': 18292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:41.042596', 'step': 18292, 'epoch': 3} {'type': 'loss', 'content': 0.0719209536910057, 'timestamp': '2025-09-10 02:47:41.044976', 'step': 18293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:41.075231', 'step': 18293, 'epoch': 3} {'type': 'loss', 'content': 0.10043801367282867, 'timestamp': '2025-09-10 02:47:41.079273', 'step': 18294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.109346', 'step': 18294, 'epoch': 3} {'type': 'loss', 'content': 0.013615928590297699, 'timestamp': '2025-09-10 02:47:41.111674', 'step': 18295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.142279', 'step': 18295, 'epoch': 3} {'type': 'loss', 'content': 0.12131454050540924, 'timestamp': '2025-09-10 02:47:41.165599', 'step': 18296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:41.196387', 'step': 18296, 'epoch': 3} {'type': 'loss', 'content': 0.05317116901278496, 'timestamp': '2025-09-10 02:47:41.198642', 'step': 18297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:41.230527', 'step': 18297, 'epoch': 3} {'type': 'loss', 'content': 0.06753409653902054, 'timestamp': '2025-09-10 02:47:41.233141', 'step': 18298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.263956', 'step': 18298, 'epoch': 3} {'type': 'loss', 'content': 0.049392953515052795, 'timestamp': '2025-09-10 02:47:41.266372', 'step': 18299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:41.296866', 'step': 18299, 'epoch': 3} {'type': 'loss', 'content': 0.08996305614709854, 'timestamp': '2025-09-10 02:47:41.320774', 'step': 18300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:41.351213', 'step': 18300, 'epoch': 3} {'type': 'loss', 'content': 0.10241203755140305, 'timestamp': '2025-09-10 02:47:41.353552', 'step': 18301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:41.383620', 'step': 18301, 'epoch': 3} {'type': 'loss', 'content': 0.10190290957689285, 'timestamp': '2025-09-10 02:47:41.387256', 'step': 18302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.419027', 'step': 18302, 'epoch': 3} {'type': 'loss', 'content': 0.06198512390255928, 'timestamp': '2025-09-10 02:47:41.421379', 'step': 18303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.450971', 'step': 18303, 'epoch': 3} {'type': 'loss', 'content': 0.02797255478799343, 'timestamp': '2025-09-10 02:47:41.474772', 'step': 18304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.505005', 'step': 18304, 'epoch': 3} {'type': 'loss', 'content': 0.05640319362282753, 'timestamp': '2025-09-10 02:47:41.507928', 'step': 18305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:41.539462', 'step': 18305, 'epoch': 3} {'type': 'loss', 'content': 0.09219761937856674, 'timestamp': '2025-09-10 02:47:41.541981', 'step': 18306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:41.572134', 'step': 18306, 'epoch': 3} {'type': 'loss', 'content': 0.07568345963954926, 'timestamp': '2025-09-10 02:47:41.574785', 'step': 18307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.604818', 'step': 18307, 'epoch': 3} {'type': 'loss', 'content': 0.01841467246413231, 'timestamp': '2025-09-10 02:47:41.628606', 'step': 18308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.659226', 'step': 18308, 'epoch': 3} {'type': 'loss', 'content': 0.05815439671278, 'timestamp': '2025-09-10 02:47:41.661552', 'step': 18309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.691969', 'step': 18309, 'epoch': 3} {'type': 'loss', 'content': 0.056817684322595596, 'timestamp': '2025-09-10 02:47:41.694445', 'step': 18310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.724715', 'step': 18310, 'epoch': 3} {'type': 'loss', 'content': 0.09468553215265274, 'timestamp': '2025-09-10 02:47:41.727096', 'step': 18311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:41.757515', 'step': 18311, 'epoch': 3} {'type': 'loss', 'content': 0.07981876283884048, 'timestamp': '2025-09-10 02:47:41.781008', 'step': 18312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:41.811524', 'step': 18312, 'epoch': 3} {'type': 'loss', 'content': 0.051463108509778976, 'timestamp': '2025-09-10 02:47:41.814802', 'step': 18313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.848865', 'step': 18313, 'epoch': 3} {'type': 'loss', 'content': 0.045065753161907196, 'timestamp': '2025-09-10 02:47:41.851625', 'step': 18314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:41.882185', 'step': 18314, 'epoch': 3} {'type': 'loss', 'content': 0.08225264400243759, 'timestamp': '2025-09-10 02:47:41.884723', 'step': 18315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:41.915948', 'step': 18315, 'epoch': 3} {'type': 'loss', 'content': 0.07131599634885788, 'timestamp': '2025-09-10 02:47:41.939579', 'step': 18316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:41.970231', 'step': 18316, 'epoch': 3} {'type': 'loss', 'content': 0.09107273072004318, 'timestamp': '2025-09-10 02:47:41.972568', 'step': 18317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.003285', 'step': 18317, 'epoch': 3} {'type': 'loss', 'content': 0.08195912837982178, 'timestamp': '2025-09-10 02:47:42.005537', 'step': 18318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.035841', 'step': 18318, 'epoch': 3} {'type': 'loss', 'content': 0.09402341395616531, 'timestamp': '2025-09-10 02:47:42.038375', 'step': 18319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.069267', 'step': 18319, 'epoch': 3} {'type': 'loss', 'content': 0.05448206141591072, 'timestamp': '2025-09-10 02:47:42.092990', 'step': 18320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.123754', 'step': 18320, 'epoch': 3} {'type': 'loss', 'content': 0.06485527753829956, 'timestamp': '2025-09-10 02:47:42.126398', 'step': 18321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:42.156737', 'step': 18321, 'epoch': 3} {'type': 'loss', 'content': 0.08690226823091507, 'timestamp': '2025-09-10 02:47:42.159498', 'step': 18322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:42.191138', 'step': 18322, 'epoch': 3} {'type': 'loss', 'content': 0.07878755778074265, 'timestamp': '2025-09-10 02:47:42.193649', 'step': 18323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:42.224325', 'step': 18323, 'epoch': 3} {'type': 'loss', 'content': 0.07011398673057556, 'timestamp': '2025-09-10 02:47:42.247935', 'step': 18324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.278736', 'step': 18324, 'epoch': 3} {'type': 'loss', 'content': 0.06441564112901688, 'timestamp': '2025-09-10 02:47:42.281226', 'step': 18325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:42.311717', 'step': 18325, 'epoch': 3} {'type': 'loss', 'content': 0.0702381283044815, 'timestamp': '2025-09-10 02:47:42.315294', 'step': 18326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:42.354042', 'step': 18326, 'epoch': 3} {'type': 'loss', 'content': 0.06991460174322128, 'timestamp': '2025-09-10 02:47:42.356549', 'step': 18327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:42.388039', 'step': 18327, 'epoch': 3} {'type': 'loss', 'content': 0.07935678213834763, 'timestamp': '2025-09-10 02:47:42.411732', 'step': 18328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:42.442244', 'step': 18328, 'epoch': 3} {'type': 'loss', 'content': 0.029893988743424416, 'timestamp': '2025-09-10 02:47:42.444718', 'step': 18329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:42.474857', 'step': 18329, 'epoch': 3} {'type': 'loss', 'content': 0.04872721806168556, 'timestamp': '2025-09-10 02:47:42.477468', 'step': 18330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:42.507754', 'step': 18330, 'epoch': 3} {'type': 'loss', 'content': 0.03518354892730713, 'timestamp': '2025-09-10 02:47:42.510066', 'step': 18331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:42.540141', 'step': 18331, 'epoch': 3} {'type': 'loss', 'content': 0.09381196647882462, 'timestamp': '2025-09-10 02:47:42.564210', 'step': 18332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.596016', 'step': 18332, 'epoch': 3} {'type': 'loss', 'content': 0.09515571594238281, 'timestamp': '2025-09-10 02:47:42.598448', 'step': 18333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:42.629820', 'step': 18333, 'epoch': 3} {'type': 'loss', 'content': 0.035775963217020035, 'timestamp': '2025-09-10 02:47:42.632357', 'step': 18334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.662180', 'step': 18334, 'epoch': 3} {'type': 'loss', 'content': 0.09477297961711884, 'timestamp': '2025-09-10 02:47:42.664559', 'step': 18335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:42.694819', 'step': 18335, 'epoch': 3} {'type': 'loss', 'content': 0.06608356535434723, 'timestamp': '2025-09-10 02:47:42.718278', 'step': 18336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:42.749926', 'step': 18336, 'epoch': 3} {'type': 'loss', 'content': 0.0033615510910749435, 'timestamp': '2025-09-10 02:47:42.752329', 'step': 18337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.782504', 'step': 18337, 'epoch': 3} {'type': 'loss', 'content': 0.04511314630508423, 'timestamp': '2025-09-10 02:47:42.785104', 'step': 18338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.815194', 'step': 18338, 'epoch': 3} {'type': 'loss', 'content': 0.044043637812137604, 'timestamp': '2025-09-10 02:47:42.817992', 'step': 18339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:42.849087', 'step': 18339, 'epoch': 3} {'type': 'loss', 'content': 0.021490616723895073, 'timestamp': '2025-09-10 02:47:42.872437', 'step': 18340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:42.905045', 'step': 18340, 'epoch': 3} {'type': 'loss', 'content': 0.047023072838783264, 'timestamp': '2025-09-10 02:47:42.907424', 'step': 18341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:42.938034', 'step': 18341, 'epoch': 3} {'type': 'loss', 'content': 0.05381257086992264, 'timestamp': '2025-09-10 02:47:42.940649', 'step': 18342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:42.971164', 'step': 18342, 'epoch': 3} {'type': 'loss', 'content': 0.07986954599618912, 'timestamp': '2025-09-10 02:47:42.974097', 'step': 18343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:43.004977', 'step': 18343, 'epoch': 3} {'type': 'loss', 'content': 0.06852677464485168, 'timestamp': '2025-09-10 02:47:43.028615', 'step': 18344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.058892', 'step': 18344, 'epoch': 3} {'type': 'loss', 'content': 0.03832133859395981, 'timestamp': '2025-09-10 02:47:43.061417', 'step': 18345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.091463', 'step': 18345, 'epoch': 3} {'type': 'loss', 'content': 0.04160891845822334, 'timestamp': '2025-09-10 02:47:43.094064', 'step': 18346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:43.124918', 'step': 18346, 'epoch': 3} {'type': 'loss', 'content': 0.12260656803846359, 'timestamp': '2025-09-10 02:47:43.127524', 'step': 18347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:43.159558', 'step': 18347, 'epoch': 3} {'type': 'loss', 'content': 0.16315214335918427, 'timestamp': '2025-09-10 02:47:43.183250', 'step': 18348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:43.214668', 'step': 18348, 'epoch': 3} {'type': 'loss', 'content': 0.028134524822235107, 'timestamp': '2025-09-10 02:47:43.217399', 'step': 18349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:43.247800', 'step': 18349, 'epoch': 3} {'type': 'loss', 'content': 0.07450472563505173, 'timestamp': '2025-09-10 02:47:43.250403', 'step': 18350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.280490', 'step': 18350, 'epoch': 3} {'type': 'loss', 'content': 0.07729596644639969, 'timestamp': '2025-09-10 02:47:43.283134', 'step': 18351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.313606', 'step': 18351, 'epoch': 3} {'type': 'loss', 'content': 0.047359053045511246, 'timestamp': '2025-09-10 02:47:43.337117', 'step': 18352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.372305', 'step': 18352, 'epoch': 3} {'type': 'loss', 'content': 0.034382164478302, 'timestamp': '2025-09-10 02:47:43.374796', 'step': 18353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.405335', 'step': 18353, 'epoch': 3} {'type': 'loss', 'content': 0.061599764972925186, 'timestamp': '2025-09-10 02:47:43.407769', 'step': 18354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.440180', 'step': 18354, 'epoch': 3} {'type': 'loss', 'content': 0.034593455493450165, 'timestamp': '2025-09-10 02:47:43.442169', 'step': 18355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.472245', 'step': 18355, 'epoch': 3} {'type': 'loss', 'content': 0.14101848006248474, 'timestamp': '2025-09-10 02:47:43.495897', 'step': 18356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:43.526130', 'step': 18356, 'epoch': 3} {'type': 'loss', 'content': 0.034179966896772385, 'timestamp': '2025-09-10 02:47:43.529775', 'step': 18357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.561773', 'step': 18357, 'epoch': 3} {'type': 'loss', 'content': 0.13777193427085876, 'timestamp': '2025-09-10 02:47:43.563805', 'step': 18358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:43.594862', 'step': 18358, 'epoch': 3} {'type': 'loss', 'content': 0.04474344477057457, 'timestamp': '2025-09-10 02:47:43.597466', 'step': 18359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:43.627713', 'step': 18359, 'epoch': 3} {'type': 'loss', 'content': 0.05409218370914459, 'timestamp': '2025-09-10 02:47:43.651430', 'step': 18360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:43.683033', 'step': 18360, 'epoch': 3} {'type': 'loss', 'content': 0.06864108890295029, 'timestamp': '2025-09-10 02:47:43.685597', 'step': 18361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:43.715970', 'step': 18361, 'epoch': 3} {'type': 'loss', 'content': 0.11791390180587769, 'timestamp': '2025-09-10 02:47:43.718249', 'step': 18362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.748174', 'step': 18362, 'epoch': 3} {'type': 'loss', 'content': 0.06982356309890747, 'timestamp': '2025-09-10 02:47:43.750796', 'step': 18363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:43.786633', 'step': 18363, 'epoch': 3} {'type': 'loss', 'content': 0.06167382374405861, 'timestamp': '2025-09-10 02:47:43.810174', 'step': 18364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:43.840643', 'step': 18364, 'epoch': 3} {'type': 'loss', 'content': 0.10208633542060852, 'timestamp': '2025-09-10 02:47:43.842881', 'step': 18365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:43.873432', 'step': 18365, 'epoch': 3} {'type': 'loss', 'content': 0.042852889746427536, 'timestamp': '2025-09-10 02:47:43.875864', 'step': 18366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.906194', 'step': 18366, 'epoch': 3} {'type': 'loss', 'content': 0.024148857221007347, 'timestamp': '2025-09-10 02:47:43.908648', 'step': 18367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:43.939038', 'step': 18367, 'epoch': 3} {'type': 'loss', 'content': 0.08994600921869278, 'timestamp': '2025-09-10 02:47:43.963754', 'step': 18368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:43.994865', 'step': 18368, 'epoch': 3} {'type': 'loss', 'content': 0.08941750228404999, 'timestamp': '2025-09-10 02:47:43.997429', 'step': 18369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:44.027683', 'step': 18369, 'epoch': 3} {'type': 'loss', 'content': 0.03914840891957283, 'timestamp': '2025-09-10 02:47:44.030223', 'step': 18370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:44.060191', 'step': 18370, 'epoch': 3} {'type': 'loss', 'content': 0.08619825541973114, 'timestamp': '2025-09-10 02:47:44.062680', 'step': 18371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:44.092752', 'step': 18371, 'epoch': 3} {'type': 'loss', 'content': 0.12789584696292877, 'timestamp': '2025-09-10 02:47:44.116480', 'step': 18372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:44.146945', 'step': 18372, 'epoch': 3} {'type': 'loss', 'content': 0.040053270757198334, 'timestamp': '2025-09-10 02:47:44.149298', 'step': 18373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:44.179613', 'step': 18373, 'epoch': 3} {'type': 'loss', 'content': 0.09597251564264297, 'timestamp': '2025-09-10 02:47:44.182031', 'step': 18374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.212058', 'step': 18374, 'epoch': 3} {'type': 'loss', 'content': 0.04655282571911812, 'timestamp': '2025-09-10 02:47:44.214642', 'step': 18375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:44.245342', 'step': 18375, 'epoch': 3} {'type': 'loss', 'content': 0.0905119851231575, 'timestamp': '2025-09-10 02:47:44.268779', 'step': 18376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.299208', 'step': 18376, 'epoch': 3} {'type': 'loss', 'content': 0.016242507845163345, 'timestamp': '2025-09-10 02:47:44.301446', 'step': 18377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.332534', 'step': 18377, 'epoch': 3} {'type': 'loss', 'content': 0.030293341726064682, 'timestamp': '2025-09-10 02:47:44.338200', 'step': 18378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.372392', 'step': 18378, 'epoch': 3} {'type': 'loss', 'content': 0.04967827722430229, 'timestamp': '2025-09-10 02:47:44.374900', 'step': 18379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:44.404790', 'step': 18379, 'epoch': 3} {'type': 'loss', 'content': 0.07905752211809158, 'timestamp': '2025-09-10 02:47:44.428572', 'step': 18380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:44.458868', 'step': 18380, 'epoch': 3} {'type': 'loss', 'content': 0.06815782189369202, 'timestamp': '2025-09-10 02:47:44.460999', 'step': 18381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:44.491282', 'step': 18381, 'epoch': 3} {'type': 'loss', 'content': 0.026233937591314316, 'timestamp': '2025-09-10 02:47:44.493526', 'step': 18382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.523696', 'step': 18382, 'epoch': 3} {'type': 'loss', 'content': 0.07205720990896225, 'timestamp': '2025-09-10 02:47:44.525914', 'step': 18383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.555904', 'step': 18383, 'epoch': 3} {'type': 'loss', 'content': 0.10324651002883911, 'timestamp': '2025-09-10 02:47:44.579699', 'step': 18384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.609716', 'step': 18384, 'epoch': 3} {'type': 'loss', 'content': 0.07535766065120697, 'timestamp': '2025-09-10 02:47:44.612625', 'step': 18385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.642788', 'step': 18385, 'epoch': 3} {'type': 'loss', 'content': 0.03717267885804176, 'timestamp': '2025-09-10 02:47:44.645453', 'step': 18386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.677025', 'step': 18386, 'epoch': 3} {'type': 'loss', 'content': 0.12557046115398407, 'timestamp': '2025-09-10 02:47:44.679353', 'step': 18387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:44.708919', 'step': 18387, 'epoch': 3} {'type': 'loss', 'content': 0.040032461285591125, 'timestamp': '2025-09-10 02:47:44.732584', 'step': 18388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:44.763292', 'step': 18388, 'epoch': 3} {'type': 'loss', 'content': 0.08688309043645859, 'timestamp': '2025-09-10 02:47:44.765884', 'step': 18389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:44.795586', 'step': 18389, 'epoch': 3} {'type': 'loss', 'content': 0.06290825456380844, 'timestamp': '2025-09-10 02:47:44.798098', 'step': 18390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:44.828258', 'step': 18390, 'epoch': 3} {'type': 'loss', 'content': 0.15610913932323456, 'timestamp': '2025-09-10 02:47:44.830792', 'step': 18391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:44.862447', 'step': 18391, 'epoch': 3} {'type': 'loss', 'content': 0.0566072091460228, 'timestamp': '2025-09-10 02:47:44.886219', 'step': 18392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:44.917124', 'step': 18392, 'epoch': 3} {'type': 'loss', 'content': 0.063910111784935, 'timestamp': '2025-09-10 02:47:44.919421', 'step': 18393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:44.949063', 'step': 18393, 'epoch': 3} {'type': 'loss', 'content': 0.06695085763931274, 'timestamp': '2025-09-10 02:47:44.951655', 'step': 18394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:44.985183', 'step': 18394, 'epoch': 3} {'type': 'loss', 'content': 0.09961580485105515, 'timestamp': '2025-09-10 02:47:44.987656', 'step': 18395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:45.018802', 'step': 18395, 'epoch': 3} {'type': 'loss', 'content': 0.17882320284843445, 'timestamp': '2025-09-10 02:47:45.042635', 'step': 18396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.072919', 'step': 18396, 'epoch': 3} {'type': 'loss', 'content': 0.03410155698657036, 'timestamp': '2025-09-10 02:47:45.075836', 'step': 18397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.106817', 'step': 18397, 'epoch': 3} {'type': 'loss', 'content': 0.12331449240446091, 'timestamp': '2025-09-10 02:47:45.109336', 'step': 18398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:45.139368', 'step': 18398, 'epoch': 3} {'type': 'loss', 'content': 0.05078914016485214, 'timestamp': '2025-09-10 02:47:45.141971', 'step': 18399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:45.172470', 'step': 18399, 'epoch': 3} {'type': 'loss', 'content': 0.07254752516746521, 'timestamp': '2025-09-10 02:47:45.196493', 'step': 18400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:45.226828', 'step': 18400, 'epoch': 3} {'type': 'loss', 'content': 0.11787054687738419, 'timestamp': '2025-09-10 02:47:45.229311', 'step': 18401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:45.259561', 'step': 18401, 'epoch': 3} {'type': 'loss', 'content': 0.14989152550697327, 'timestamp': '2025-09-10 02:47:45.261862', 'step': 18402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:45.292644', 'step': 18402, 'epoch': 3} {'type': 'loss', 'content': 0.07172803580760956, 'timestamp': '2025-09-10 02:47:45.296099', 'step': 18403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:45.327926', 'step': 18403, 'epoch': 3} {'type': 'loss', 'content': 0.051089122891426086, 'timestamp': '2025-09-10 02:47:45.351367', 'step': 18404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.383964', 'step': 18404, 'epoch': 3} {'type': 'loss', 'content': 0.05095837265253067, 'timestamp': '2025-09-10 02:47:45.386344', 'step': 18405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.416508', 'step': 18405, 'epoch': 3} {'type': 'loss', 'content': 0.054044224321842194, 'timestamp': '2025-09-10 02:47:45.419100', 'step': 18406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:45.449088', 'step': 18406, 'epoch': 3} {'type': 'loss', 'content': 0.02659717947244644, 'timestamp': '2025-09-10 02:47:45.451424', 'step': 18407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:45.481680', 'step': 18407, 'epoch': 3} {'type': 'loss', 'content': 0.0636930987238884, 'timestamp': '2025-09-10 02:47:45.505128', 'step': 18408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.535728', 'step': 18408, 'epoch': 3} {'type': 'loss', 'content': 0.05706443637609482, 'timestamp': '2025-09-10 02:47:45.541153', 'step': 18409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:45.579423', 'step': 18409, 'epoch': 3} {'type': 'loss', 'content': 0.023217573761940002, 'timestamp': '2025-09-10 02:47:45.581887', 'step': 18410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:45.611927', 'step': 18410, 'epoch': 3} {'type': 'loss', 'content': 0.03839065134525299, 'timestamp': '2025-09-10 02:47:45.614246', 'step': 18411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.644960', 'step': 18411, 'epoch': 3} {'type': 'loss', 'content': 0.08031896501779556, 'timestamp': '2025-09-10 02:47:45.668627', 'step': 18412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.698699', 'step': 18412, 'epoch': 3} {'type': 'loss', 'content': 0.020005209371447563, 'timestamp': '2025-09-10 02:47:45.701641', 'step': 18413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.732193', 'step': 18413, 'epoch': 3} {'type': 'loss', 'content': 0.0828813835978508, 'timestamp': '2025-09-10 02:47:45.734671', 'step': 18414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:45.764452', 'step': 18414, 'epoch': 3} {'type': 'loss', 'content': 0.07913311570882797, 'timestamp': '2025-09-10 02:47:45.766877', 'step': 18415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:45.797221', 'step': 18415, 'epoch': 3} {'type': 'loss', 'content': 0.11977769434452057, 'timestamp': '2025-09-10 02:47:45.820788', 'step': 18416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:47:45.865811', 'step': 18416, 'epoch': 3} {'type': 'loss', 'content': 0.02370515652000904, 'timestamp': '2025-09-10 02:47:45.872924', 'step': 18417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.911448', 'step': 18417, 'epoch': 3} {'type': 'loss', 'content': 0.0936414897441864, 'timestamp': '2025-09-10 02:47:45.914033', 'step': 18418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:45.944161', 'step': 18418, 'epoch': 3} {'type': 'loss', 'content': 0.04941440373659134, 'timestamp': '2025-09-10 02:47:45.953296', 'step': 18419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:45.991662', 'step': 18419, 'epoch': 3} {'type': 'loss', 'content': 0.06348226964473724, 'timestamp': '2025-09-10 02:47:46.015297', 'step': 18420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:46.046568', 'step': 18420, 'epoch': 3} {'type': 'loss', 'content': 0.10140412300825119, 'timestamp': '2025-09-10 02:47:46.049703', 'step': 18421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:46.082136', 'step': 18421, 'epoch': 3} {'type': 'loss', 'content': 0.0765300840139389, 'timestamp': '2025-09-10 02:47:46.085095', 'step': 18422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:46.117130', 'step': 18422, 'epoch': 3} {'type': 'loss', 'content': 0.0465785451233387, 'timestamp': '2025-09-10 02:47:46.124075', 'step': 18423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:46.156757', 'step': 18423, 'epoch': 3} {'type': 'loss', 'content': 0.04266038537025452, 'timestamp': '2025-09-10 02:47:46.182943', 'step': 18424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:46.216928', 'step': 18424, 'epoch': 3} {'type': 'loss', 'content': 0.06387456506490707, 'timestamp': '2025-09-10 02:47:46.220038', 'step': 18425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:46.258043', 'step': 18425, 'epoch': 3} {'type': 'loss', 'content': 0.048533838242292404, 'timestamp': '2025-09-10 02:47:46.269016', 'step': 18426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:46.301823', 'step': 18426, 'epoch': 3} {'type': 'loss', 'content': 0.031070444732904434, 'timestamp': '2025-09-10 02:47:46.304916', 'step': 18427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:46.343923', 'step': 18427, 'epoch': 3} {'type': 'loss', 'content': 0.07847457379102707, 'timestamp': '2025-09-10 02:47:46.368750', 'step': 18428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:46.402207', 'step': 18428, 'epoch': 3} {'type': 'loss', 'content': 0.06660257279872894, 'timestamp': '2025-09-10 02:47:46.406317', 'step': 18429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:46.438384', 'step': 18429, 'epoch': 3} {'type': 'loss', 'content': 0.09332308173179626, 'timestamp': '2025-09-10 02:47:46.440736', 'step': 18430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:46.471516', 'step': 18430, 'epoch': 3} {'type': 'loss', 'content': 0.07931926846504211, 'timestamp': '2025-09-10 02:47:46.483576', 'step': 18431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:46.522612', 'step': 18431, 'epoch': 3} {'type': 'loss', 'content': 0.0856763944029808, 'timestamp': '2025-09-10 02:47:46.546661', 'step': 18432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:46.578657', 'step': 18432, 'epoch': 3} {'type': 'loss', 'content': 0.023447206243872643, 'timestamp': '2025-09-10 02:47:46.586085', 'step': 18433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:46.618781', 'step': 18433, 'epoch': 3} {'type': 'loss', 'content': 0.11221164464950562, 'timestamp': '2025-09-10 02:47:46.622465', 'step': 18434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:46.655667', 'step': 18434, 'epoch': 3} {'type': 'loss', 'content': 0.10031532496213913, 'timestamp': '2025-09-10 02:47:46.658597', 'step': 18435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:46.691199', 'step': 18435, 'epoch': 3} {'type': 'loss', 'content': 0.051889464259147644, 'timestamp': '2025-09-10 02:47:46.714897', 'step': 18436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:46.749571', 'step': 18436, 'epoch': 3} {'type': 'loss', 'content': 0.09662320464849472, 'timestamp': '2025-09-10 02:47:46.752708', 'step': 18437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:46.803707', 'step': 18437, 'epoch': 3} {'type': 'loss', 'content': 0.08524682372808456, 'timestamp': '2025-09-10 02:47:46.806758', 'step': 18438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:46.840931', 'step': 18438, 'epoch': 3} {'type': 'loss', 'content': 0.04986824467778206, 'timestamp': '2025-09-10 02:47:46.844682', 'step': 18439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:46.880358', 'step': 18439, 'epoch': 3} {'type': 'loss', 'content': 0.09121320396661758, 'timestamp': '2025-09-10 02:47:46.904683', 'step': 18440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:46.934846', 'step': 18440, 'epoch': 3} {'type': 'loss', 'content': 0.06178956851363182, 'timestamp': '2025-09-10 02:47:46.937603', 'step': 18441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:46.970441', 'step': 18441, 'epoch': 3} {'type': 'loss', 'content': 0.05149182677268982, 'timestamp': '2025-09-10 02:47:46.972969', 'step': 18442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:47.005189', 'step': 18442, 'epoch': 3} {'type': 'loss', 'content': 0.10572338104248047, 'timestamp': '2025-09-10 02:47:47.007692', 'step': 18443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:47.038325', 'step': 18443, 'epoch': 3} {'type': 'loss', 'content': 0.03537307679653168, 'timestamp': '2025-09-10 02:47:47.061890', 'step': 18444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:47:47.094212', 'step': 18444, 'epoch': 3} {'type': 'loss', 'content': 0.048697128891944885, 'timestamp': '2025-09-10 02:47:47.096551', 'step': 18445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:47.129467', 'step': 18445, 'epoch': 3} {'type': 'loss', 'content': 0.055292993783950806, 'timestamp': '2025-09-10 02:47:47.135455', 'step': 18446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:47.169496', 'step': 18446, 'epoch': 3} {'type': 'loss', 'content': 0.08097438514232635, 'timestamp': '2025-09-10 02:47:47.173493', 'step': 18447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:47.224248', 'step': 18447, 'epoch': 3} {'type': 'loss', 'content': 0.02984885685145855, 'timestamp': '2025-09-10 02:47:47.248551', 'step': 18448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:47.280972', 'step': 18448, 'epoch': 3} {'type': 'loss', 'content': 0.048456329852342606, 'timestamp': '2025-09-10 02:47:47.285419', 'step': 18449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:47.316839', 'step': 18449, 'epoch': 3} {'type': 'loss', 'content': 0.04247664287686348, 'timestamp': '2025-09-10 02:47:47.319529', 'step': 18450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:47.350764', 'step': 18450, 'epoch': 3} {'type': 'loss', 'content': 0.0247476976364851, 'timestamp': '2025-09-10 02:47:47.356794', 'step': 18451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:47.401873', 'step': 18451, 'epoch': 3} {'type': 'loss', 'content': 0.04611649364233017, 'timestamp': '2025-09-10 02:47:47.425745', 'step': 18452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:47.456378', 'step': 18452, 'epoch': 3} {'type': 'loss', 'content': 0.09261362254619598, 'timestamp': '2025-09-10 02:47:47.458735', 'step': 18453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:47.488885', 'step': 18453, 'epoch': 3} {'type': 'loss', 'content': 0.09219043701887131, 'timestamp': '2025-09-10 02:47:47.491337', 'step': 18454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:47.521283', 'step': 18454, 'epoch': 3} {'type': 'loss', 'content': 0.056960515677928925, 'timestamp': '2025-09-10 02:47:47.524306', 'step': 18455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:47.554254', 'step': 18455, 'epoch': 3} {'type': 'loss', 'content': 0.09820318967103958, 'timestamp': '2025-09-10 02:47:47.578035', 'step': 18456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:47.615174', 'step': 18456, 'epoch': 3} {'type': 'loss', 'content': 0.04966812580823898, 'timestamp': '2025-09-10 02:47:47.618753', 'step': 18457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:47.659780', 'step': 18457, 'epoch': 3} {'type': 'loss', 'content': 0.01790342666208744, 'timestamp': '2025-09-10 02:47:47.662596', 'step': 18458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:47.698957', 'step': 18458, 'epoch': 3} {'type': 'loss', 'content': 0.11075685918331146, 'timestamp': '2025-09-10 02:47:47.702701', 'step': 18459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:47.738344', 'step': 18459, 'epoch': 3} {'type': 'loss', 'content': 0.10469913482666016, 'timestamp': '2025-09-10 02:47:47.763371', 'step': 18460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:47.797859', 'step': 18460, 'epoch': 3} {'type': 'loss', 'content': 0.09042704850435257, 'timestamp': '2025-09-10 02:47:47.802207', 'step': 18461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:47.838943', 'step': 18461, 'epoch': 3} {'type': 'loss', 'content': 0.08362485468387604, 'timestamp': '2025-09-10 02:47:47.841485', 'step': 18462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:47.872046', 'step': 18462, 'epoch': 3} {'type': 'loss', 'content': 0.06875737011432648, 'timestamp': '2025-09-10 02:47:47.874465', 'step': 18463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:47.905426', 'step': 18463, 'epoch': 3} {'type': 'loss', 'content': 0.0264749638736248, 'timestamp': '2025-09-10 02:47:47.929188', 'step': 18464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:47.960644', 'step': 18464, 'epoch': 3} {'type': 'loss', 'content': 0.09596189111471176, 'timestamp': '2025-09-10 02:47:47.962905', 'step': 18465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:47.993023', 'step': 18465, 'epoch': 3} {'type': 'loss', 'content': 0.11339928954839706, 'timestamp': '2025-09-10 02:47:47.995801', 'step': 18466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:48.026217', 'step': 18466, 'epoch': 3} {'type': 'loss', 'content': 0.060601696372032166, 'timestamp': '2025-09-10 02:47:48.030481', 'step': 18467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:48.062384', 'step': 18467, 'epoch': 3} {'type': 'loss', 'content': 0.04921824857592583, 'timestamp': '2025-09-10 02:47:48.085736', 'step': 18468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:48.115459', 'step': 18468, 'epoch': 3} {'type': 'loss', 'content': 0.03659771755337715, 'timestamp': '2025-09-10 02:47:48.117821', 'step': 18469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:48.147073', 'step': 18469, 'epoch': 3} {'type': 'loss', 'content': 0.06370529532432556, 'timestamp': '2025-09-10 02:47:48.149997', 'step': 18470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:48.180706', 'step': 18470, 'epoch': 3} {'type': 'loss', 'content': 0.06279326230287552, 'timestamp': '2025-09-10 02:47:48.183122', 'step': 18471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:48.213195', 'step': 18471, 'epoch': 3} {'type': 'loss', 'content': 0.04587852954864502, 'timestamp': '2025-09-10 02:47:48.237408', 'step': 18472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:48.268300', 'step': 18472, 'epoch': 3} {'type': 'loss', 'content': 0.0437520332634449, 'timestamp': '2025-09-10 02:47:48.271198', 'step': 18473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:48.302429', 'step': 18473, 'epoch': 3} {'type': 'loss', 'content': 0.05455788969993591, 'timestamp': '2025-09-10 02:47:48.304904', 'step': 18474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:48.334969', 'step': 18474, 'epoch': 3} {'type': 'loss', 'content': 0.03838375210762024, 'timestamp': '2025-09-10 02:47:48.337498', 'step': 18475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:48.368298', 'step': 18475, 'epoch': 3} {'type': 'loss', 'content': 0.01702861487865448, 'timestamp': '2025-09-10 02:47:48.395415', 'step': 18476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:48.426208', 'step': 18476, 'epoch': 3} {'type': 'loss', 'content': 0.06111390143632889, 'timestamp': '2025-09-10 02:47:48.428667', 'step': 18477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:48.458779', 'step': 18477, 'epoch': 3} {'type': 'loss', 'content': 0.08021865040063858, 'timestamp': '2025-09-10 02:47:48.461519', 'step': 18478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:48.492886', 'step': 18478, 'epoch': 3} {'type': 'loss', 'content': 0.13497474789619446, 'timestamp': '2025-09-10 02:47:48.495118', 'step': 18479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:48.526522', 'step': 18479, 'epoch': 3} {'type': 'loss', 'content': 0.03235220909118652, 'timestamp': '2025-09-10 02:47:48.550236', 'step': 18480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:48.580738', 'step': 18480, 'epoch': 3} {'type': 'loss', 'content': 0.08841773122549057, 'timestamp': '2025-09-10 02:47:48.583184', 'step': 18481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:48.615232', 'step': 18481, 'epoch': 3} {'type': 'loss', 'content': 0.08158135414123535, 'timestamp': '2025-09-10 02:47:48.617814', 'step': 18482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:48.649014', 'step': 18482, 'epoch': 3} {'type': 'loss', 'content': 0.08478298038244247, 'timestamp': '2025-09-10 02:47:48.651459', 'step': 18483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:48.681195', 'step': 18483, 'epoch': 3} {'type': 'loss', 'content': 0.04472634941339493, 'timestamp': '2025-09-10 02:47:48.706817', 'step': 18484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:48.738546', 'step': 18484, 'epoch': 3} {'type': 'loss', 'content': 0.03767647221684456, 'timestamp': '2025-09-10 02:47:48.740835', 'step': 18485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:48.771330', 'step': 18485, 'epoch': 3} {'type': 'loss', 'content': 0.12188857793807983, 'timestamp': '2025-09-10 02:47:48.773808', 'step': 18486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:48.804124', 'step': 18486, 'epoch': 3} {'type': 'loss', 'content': 0.03922257572412491, 'timestamp': '2025-09-10 02:47:48.807880', 'step': 18487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:48.838245', 'step': 18487, 'epoch': 3} {'type': 'loss', 'content': 0.050966352224349976, 'timestamp': '2025-09-10 02:47:48.861875', 'step': 18488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:48.892571', 'step': 18488, 'epoch': 3} {'type': 'loss', 'content': 0.05398827791213989, 'timestamp': '2025-09-10 02:47:48.894971', 'step': 18489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:48.925295', 'step': 18489, 'epoch': 3} {'type': 'loss', 'content': 0.10995467007160187, 'timestamp': '2025-09-10 02:47:48.927618', 'step': 18490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:48.958059', 'step': 18490, 'epoch': 3} {'type': 'loss', 'content': 0.10763750970363617, 'timestamp': '2025-09-10 02:47:48.960721', 'step': 18491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:48.991345', 'step': 18491, 'epoch': 3} {'type': 'loss', 'content': 0.1352282464504242, 'timestamp': '2025-09-10 02:47:49.014754', 'step': 18492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:49.046548', 'step': 18492, 'epoch': 3} {'type': 'loss', 'content': 0.12209752947092056, 'timestamp': '2025-09-10 02:47:49.049332', 'step': 18493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:49.079557', 'step': 18493, 'epoch': 3} {'type': 'loss', 'content': 0.0980573296546936, 'timestamp': '2025-09-10 02:47:49.082265', 'step': 18494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:49.112211', 'step': 18494, 'epoch': 3} {'type': 'loss', 'content': 0.04862675815820694, 'timestamp': '2025-09-10 02:47:49.114504', 'step': 18495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:49.145139', 'step': 18495, 'epoch': 3} {'type': 'loss', 'content': 0.04994743689894676, 'timestamp': '2025-09-10 02:47:49.168840', 'step': 18496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:49.199189', 'step': 18496, 'epoch': 3} {'type': 'loss', 'content': 0.06682249158620834, 'timestamp': '2025-09-10 02:47:49.201506', 'step': 18497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:49.231550', 'step': 18497, 'epoch': 3} {'type': 'loss', 'content': 0.09315917640924454, 'timestamp': '2025-09-10 02:47:49.234283', 'step': 18498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:49.266151', 'step': 18498, 'epoch': 3} {'type': 'loss', 'content': 0.017410648986697197, 'timestamp': '2025-09-10 02:47:49.268737', 'step': 18499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:49.299356', 'step': 18499, 'epoch': 3} {'type': 'loss', 'content': 0.07180987298488617, 'timestamp': '2025-09-10 02:47:49.323082', 'step': 18500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18500', 'timestamp': '2025-09-10 02:47:54.101376', 'step': 18500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:54.132882', 'step': 18500, 'epoch': 3} {'type': 'loss', 'content': 0.11796268075704575, 'timestamp': '2025-09-10 02:47:54.135875', 'step': 18501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:54.167862', 'step': 18501, 'epoch': 3} {'type': 'loss', 'content': 0.06552931666374207, 'timestamp': '2025-09-10 02:47:54.171791', 'step': 18502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:54.205792', 'step': 18502, 'epoch': 3} {'type': 'loss', 'content': 0.04746072739362717, 'timestamp': '2025-09-10 02:47:54.208281', 'step': 18503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:54.238237', 'step': 18503, 'epoch': 3} {'type': 'loss', 'content': 0.08441249281167984, 'timestamp': '2025-09-10 02:47:54.263914', 'step': 18504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:54.296158', 'step': 18504, 'epoch': 3} {'type': 'loss', 'content': 0.1048399955034256, 'timestamp': '2025-09-10 02:47:54.300431', 'step': 18505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:54.332724', 'step': 18505, 'epoch': 3} {'type': 'loss', 'content': 0.12935632467269897, 'timestamp': '2025-09-10 02:47:54.335974', 'step': 18506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:54.368826', 'step': 18506, 'epoch': 3} {'type': 'loss', 'content': 0.02437315322458744, 'timestamp': '2025-09-10 02:47:54.371184', 'step': 18507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:54.404184', 'step': 18507, 'epoch': 3} {'type': 'loss', 'content': 0.06152800843119621, 'timestamp': '2025-09-10 02:47:54.427621', 'step': 18508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:54.462185', 'step': 18508, 'epoch': 3} {'type': 'loss', 'content': 0.08672314882278442, 'timestamp': '2025-09-10 02:47:54.464362', 'step': 18509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:54.493936', 'step': 18509, 'epoch': 3} {'type': 'loss', 'content': 0.04400760307908058, 'timestamp': '2025-09-10 02:47:54.496779', 'step': 18510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:54.527646', 'step': 18510, 'epoch': 3} {'type': 'loss', 'content': 0.03602626547217369, 'timestamp': '2025-09-10 02:47:54.530075', 'step': 18511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:54.560639', 'step': 18511, 'epoch': 3} {'type': 'loss', 'content': 0.0351213775575161, 'timestamp': '2025-09-10 02:47:54.584381', 'step': 18512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:54.615885', 'step': 18512, 'epoch': 3} {'type': 'loss', 'content': 0.040648143738508224, 'timestamp': '2025-09-10 02:47:54.618245', 'step': 18513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:54.647803', 'step': 18513, 'epoch': 3} {'type': 'loss', 'content': 0.10435131937265396, 'timestamp': '2025-09-10 02:47:54.650169', 'step': 18514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:54.680902', 'step': 18514, 'epoch': 3} {'type': 'loss', 'content': 0.09240395575761795, 'timestamp': '2025-09-10 02:47:54.683303', 'step': 18515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:54.714418', 'step': 18515, 'epoch': 3} {'type': 'loss', 'content': 0.055285025388002396, 'timestamp': '2025-09-10 02:47:54.738247', 'step': 18516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:54.769239', 'step': 18516, 'epoch': 3} {'type': 'loss', 'content': 0.10244414210319519, 'timestamp': '2025-09-10 02:47:54.771770', 'step': 18517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:54.802547', 'step': 18517, 'epoch': 3} {'type': 'loss', 'content': 0.05640220269560814, 'timestamp': '2025-09-10 02:47:54.805496', 'step': 18518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:54.835341', 'step': 18518, 'epoch': 3} {'type': 'loss', 'content': 0.04372916370630264, 'timestamp': '2025-09-10 02:47:54.837976', 'step': 18519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:54.868722', 'step': 18519, 'epoch': 3} {'type': 'loss', 'content': 0.04993332177400589, 'timestamp': '2025-09-10 02:47:54.892381', 'step': 18520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:54.923255', 'step': 18520, 'epoch': 3} {'type': 'loss', 'content': 0.09305174648761749, 'timestamp': '2025-09-10 02:47:54.925519', 'step': 18521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:54.955540', 'step': 18521, 'epoch': 3} {'type': 'loss', 'content': 0.017237773165106773, 'timestamp': '2025-09-10 02:47:54.958042', 'step': 18522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:54.988478', 'step': 18522, 'epoch': 3} {'type': 'loss', 'content': 0.03093300200998783, 'timestamp': '2025-09-10 02:47:54.990913', 'step': 18523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:55.021917', 'step': 18523, 'epoch': 3} {'type': 'loss', 'content': 0.0442972294986248, 'timestamp': '2025-09-10 02:47:55.045307', 'step': 18524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.076245', 'step': 18524, 'epoch': 3} {'type': 'loss', 'content': 0.05140835419297218, 'timestamp': '2025-09-10 02:47:55.078719', 'step': 18525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:55.110532', 'step': 18525, 'epoch': 3} {'type': 'loss', 'content': 0.037348054349422455, 'timestamp': '2025-09-10 02:47:55.112938', 'step': 18526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.143519', 'step': 18526, 'epoch': 3} {'type': 'loss', 'content': 0.15552212297916412, 'timestamp': '2025-09-10 02:47:55.145904', 'step': 18527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.176243', 'step': 18527, 'epoch': 3} {'type': 'loss', 'content': 0.032564133405685425, 'timestamp': '2025-09-10 02:47:55.199889', 'step': 18528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.230225', 'step': 18528, 'epoch': 3} {'type': 'loss', 'content': 0.09102137386798859, 'timestamp': '2025-09-10 02:47:55.232724', 'step': 18529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:55.262491', 'step': 18529, 'epoch': 3} {'type': 'loss', 'content': 0.06011619418859482, 'timestamp': '2025-09-10 02:47:55.265068', 'step': 18530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.295334', 'step': 18530, 'epoch': 3} {'type': 'loss', 'content': 0.11383664608001709, 'timestamp': '2025-09-10 02:47:55.298243', 'step': 18531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:55.330138', 'step': 18531, 'epoch': 3} {'type': 'loss', 'content': 0.12432262301445007, 'timestamp': '2025-09-10 02:47:55.353800', 'step': 18532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:55.385747', 'step': 18532, 'epoch': 3} {'type': 'loss', 'content': 0.08576596528291702, 'timestamp': '2025-09-10 02:47:55.389709', 'step': 18533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.419973', 'step': 18533, 'epoch': 3} {'type': 'loss', 'content': 0.1674390733242035, 'timestamp': '2025-09-10 02:47:55.423529', 'step': 18534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:55.454194', 'step': 18534, 'epoch': 3} {'type': 'loss', 'content': 0.06101491302251816, 'timestamp': '2025-09-10 02:47:55.456511', 'step': 18535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:55.487363', 'step': 18535, 'epoch': 3} {'type': 'loss', 'content': 0.12837956845760345, 'timestamp': '2025-09-10 02:47:55.512458', 'step': 18536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:55.542964', 'step': 18536, 'epoch': 3} {'type': 'loss', 'content': 0.05405651777982712, 'timestamp': '2025-09-10 02:47:55.545446', 'step': 18537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:55.576031', 'step': 18537, 'epoch': 3} {'type': 'loss', 'content': 0.07505343109369278, 'timestamp': '2025-09-10 02:47:55.578538', 'step': 18538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.609333', 'step': 18538, 'epoch': 3} {'type': 'loss', 'content': 0.04041478782892227, 'timestamp': '2025-09-10 02:47:55.611393', 'step': 18539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:55.641602', 'step': 18539, 'epoch': 3} {'type': 'loss', 'content': 0.10462108254432678, 'timestamp': '2025-09-10 02:47:55.665339', 'step': 18540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:55.696287', 'step': 18540, 'epoch': 3} {'type': 'loss', 'content': 0.08421371132135391, 'timestamp': '2025-09-10 02:47:55.700919', 'step': 18541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:55.732619', 'step': 18541, 'epoch': 3} {'type': 'loss', 'content': 0.049783043563365936, 'timestamp': '2025-09-10 02:47:55.735460', 'step': 18542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:55.765546', 'step': 18542, 'epoch': 3} {'type': 'loss', 'content': 0.07350283861160278, 'timestamp': '2025-09-10 02:47:55.768877', 'step': 18543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.800638', 'step': 18543, 'epoch': 3} {'type': 'loss', 'content': 0.1440289318561554, 'timestamp': '2025-09-10 02:47:55.824412', 'step': 18544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.855200', 'step': 18544, 'epoch': 3} {'type': 'loss', 'content': 0.04401614889502525, 'timestamp': '2025-09-10 02:47:55.857587', 'step': 18545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:55.888945', 'step': 18545, 'epoch': 3} {'type': 'loss', 'content': 0.008389770984649658, 'timestamp': '2025-09-10 02:47:55.891550', 'step': 18546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:55.923853', 'step': 18546, 'epoch': 3} {'type': 'loss', 'content': 0.01656700111925602, 'timestamp': '2025-09-10 02:47:55.927500', 'step': 18547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:55.959138', 'step': 18547, 'epoch': 3} {'type': 'loss', 'content': 0.038882724940776825, 'timestamp': '2025-09-10 02:47:55.982540', 'step': 18548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:56.013026', 'step': 18548, 'epoch': 3} {'type': 'loss', 'content': 0.12167329341173172, 'timestamp': '2025-09-10 02:47:56.015396', 'step': 18549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:56.045443', 'step': 18549, 'epoch': 3} {'type': 'loss', 'content': 0.07207489013671875, 'timestamp': '2025-09-10 02:47:56.047746', 'step': 18550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:56.078342', 'step': 18550, 'epoch': 3} {'type': 'loss', 'content': 0.07649926096200943, 'timestamp': '2025-09-10 02:47:56.080868', 'step': 18551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:56.112644', 'step': 18551, 'epoch': 3} {'type': 'loss', 'content': 0.11370497196912766, 'timestamp': '2025-09-10 02:47:56.136325', 'step': 18552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:56.167252', 'step': 18552, 'epoch': 3} {'type': 'loss', 'content': 0.07416355609893799, 'timestamp': '2025-09-10 02:47:56.169486', 'step': 18553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:56.200475', 'step': 18553, 'epoch': 3} {'type': 'loss', 'content': 0.0720394104719162, 'timestamp': '2025-09-10 02:47:56.202659', 'step': 18554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:56.233033', 'step': 18554, 'epoch': 3} {'type': 'loss', 'content': 0.032614048570394516, 'timestamp': '2025-09-10 02:47:56.235513', 'step': 18555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:56.266405', 'step': 18555, 'epoch': 3} {'type': 'loss', 'content': 0.1254071146249771, 'timestamp': '2025-09-10 02:47:56.289902', 'step': 18556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:47:56.321510', 'step': 18556, 'epoch': 3} {'type': 'loss', 'content': 0.06209436058998108, 'timestamp': '2025-09-10 02:47:56.324102', 'step': 18557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:56.354948', 'step': 18557, 'epoch': 3} {'type': 'loss', 'content': 0.06913067400455475, 'timestamp': '2025-09-10 02:47:56.357853', 'step': 18558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:56.388678', 'step': 18558, 'epoch': 3} {'type': 'loss', 'content': 0.05796970799565315, 'timestamp': '2025-09-10 02:47:56.390984', 'step': 18559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:56.421311', 'step': 18559, 'epoch': 3} {'type': 'loss', 'content': 0.1032857820391655, 'timestamp': '2025-09-10 02:47:56.444833', 'step': 18560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:56.476757', 'step': 18560, 'epoch': 3} {'type': 'loss', 'content': 0.07600761950016022, 'timestamp': '2025-09-10 02:47:56.479492', 'step': 18561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:56.510026', 'step': 18561, 'epoch': 3} {'type': 'loss', 'content': 0.07392977178096771, 'timestamp': '2025-09-10 02:47:56.512625', 'step': 18562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:56.543564', 'step': 18562, 'epoch': 3} {'type': 'loss', 'content': 0.048484645783901215, 'timestamp': '2025-09-10 02:47:56.546157', 'step': 18563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:56.576969', 'step': 18563, 'epoch': 3} {'type': 'loss', 'content': 0.037066102027893066, 'timestamp': '2025-09-10 02:47:56.600827', 'step': 18564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:56.634152', 'step': 18564, 'epoch': 3} {'type': 'loss', 'content': 0.048608023673295975, 'timestamp': '2025-09-10 02:47:56.636785', 'step': 18565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:56.667500', 'step': 18565, 'epoch': 3} {'type': 'loss', 'content': 0.086269311606884, 'timestamp': '2025-09-10 02:47:56.670268', 'step': 18566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:56.700390', 'step': 18566, 'epoch': 3} {'type': 'loss', 'content': 0.011043625883758068, 'timestamp': '2025-09-10 02:47:56.702871', 'step': 18567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:56.735346', 'step': 18567, 'epoch': 3} {'type': 'loss', 'content': 0.14478081464767456, 'timestamp': '2025-09-10 02:47:56.759134', 'step': 18568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:56.789027', 'step': 18568, 'epoch': 3} {'type': 'loss', 'content': 0.08771519362926483, 'timestamp': '2025-09-10 02:47:56.791387', 'step': 18569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:56.821795', 'step': 18569, 'epoch': 3} {'type': 'loss', 'content': 0.1272115260362625, 'timestamp': '2025-09-10 02:47:56.824388', 'step': 18570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:56.854455', 'step': 18570, 'epoch': 3} {'type': 'loss', 'content': 0.1100718230009079, 'timestamp': '2025-09-10 02:47:56.856890', 'step': 18571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-10 02:47:56.890967', 'step': 18571, 'epoch': 3} {'type': 'loss', 'content': 0.04909875988960266, 'timestamp': '2025-09-10 02:47:56.925512', 'step': 18572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:56.955782', 'step': 18572, 'epoch': 3} {'type': 'loss', 'content': 0.025153735652565956, 'timestamp': '2025-09-10 02:47:56.958182', 'step': 18573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:56.988192', 'step': 18573, 'epoch': 3} {'type': 'loss', 'content': 0.07604772597551346, 'timestamp': '2025-09-10 02:47:56.990793', 'step': 18574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:57.021189', 'step': 18574, 'epoch': 3} {'type': 'loss', 'content': 0.048516515642404556, 'timestamp': '2025-09-10 02:47:57.023609', 'step': 18575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:57.053982', 'step': 18575, 'epoch': 3} {'type': 'loss', 'content': 0.1060388833284378, 'timestamp': '2025-09-10 02:47:57.077709', 'step': 18576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:57.107932', 'step': 18576, 'epoch': 3} {'type': 'loss', 'content': 0.13913007080554962, 'timestamp': '2025-09-10 02:47:57.111755', 'step': 18577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:57.141369', 'step': 18577, 'epoch': 3} {'type': 'loss', 'content': 0.04360166937112808, 'timestamp': '2025-09-10 02:47:57.144235', 'step': 18578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:57.174519', 'step': 18578, 'epoch': 3} {'type': 'loss', 'content': 0.023534109815955162, 'timestamp': '2025-09-10 02:47:57.176767', 'step': 18579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:57.206924', 'step': 18579, 'epoch': 3} {'type': 'loss', 'content': 0.16345907747745514, 'timestamp': '2025-09-10 02:47:57.230601', 'step': 18580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:57.262180', 'step': 18580, 'epoch': 3} {'type': 'loss', 'content': 0.05577486380934715, 'timestamp': '2025-09-10 02:47:57.264596', 'step': 18581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:57.295624', 'step': 18581, 'epoch': 3} {'type': 'loss', 'content': 0.05780642479658127, 'timestamp': '2025-09-10 02:47:57.298207', 'step': 18582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:57.329125', 'step': 18582, 'epoch': 3} {'type': 'loss', 'content': 0.1763744354248047, 'timestamp': '2025-09-10 02:47:57.331280', 'step': 18583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:57.361778', 'step': 18583, 'epoch': 3} {'type': 'loss', 'content': 0.08824390172958374, 'timestamp': '2025-09-10 02:47:57.385651', 'step': 18584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:57.417292', 'step': 18584, 'epoch': 3} {'type': 'loss', 'content': 0.062132708728313446, 'timestamp': '2025-09-10 02:47:57.419436', 'step': 18585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:57.449455', 'step': 18585, 'epoch': 3} {'type': 'loss', 'content': 0.11075904965400696, 'timestamp': '2025-09-10 02:47:57.455880', 'step': 18586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:57.496728', 'step': 18586, 'epoch': 3} {'type': 'loss', 'content': 0.0799821987748146, 'timestamp': '2025-09-10 02:47:57.499546', 'step': 18587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:57.530374', 'step': 18587, 'epoch': 3} {'type': 'loss', 'content': 0.0441400483250618, 'timestamp': '2025-09-10 02:47:57.553871', 'step': 18588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:57.585947', 'step': 18588, 'epoch': 3} {'type': 'loss', 'content': 0.020137613639235497, 'timestamp': '2025-09-10 02:47:57.588494', 'step': 18589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:57.618899', 'step': 18589, 'epoch': 3} {'type': 'loss', 'content': 0.07278461754322052, 'timestamp': '2025-09-10 02:47:57.621319', 'step': 18590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:57.651266', 'step': 18590, 'epoch': 3} {'type': 'loss', 'content': 0.03504224121570587, 'timestamp': '2025-09-10 02:47:57.653705', 'step': 18591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:57.686411', 'step': 18591, 'epoch': 3} {'type': 'loss', 'content': 0.09668990969657898, 'timestamp': '2025-09-10 02:47:57.711299', 'step': 18592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:57.743416', 'step': 18592, 'epoch': 3} {'type': 'loss', 'content': 0.030789094045758247, 'timestamp': '2025-09-10 02:47:57.746023', 'step': 18593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:57.776453', 'step': 18593, 'epoch': 3} {'type': 'loss', 'content': 0.07139734923839569, 'timestamp': '2025-09-10 02:47:57.779103', 'step': 18594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:57.810960', 'step': 18594, 'epoch': 3} {'type': 'loss', 'content': 0.12777790427207947, 'timestamp': '2025-09-10 02:47:57.813444', 'step': 18595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:57.845199', 'step': 18595, 'epoch': 3} {'type': 'loss', 'content': 0.03661179542541504, 'timestamp': '2025-09-10 02:47:57.869133', 'step': 18596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:57.903842', 'step': 18596, 'epoch': 3} {'type': 'loss', 'content': 0.06477174907922745, 'timestamp': '2025-09-10 02:47:57.911414', 'step': 18597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:57.946958', 'step': 18597, 'epoch': 3} {'type': 'loss', 'content': 0.05199337378144264, 'timestamp': '2025-09-10 02:47:57.949441', 'step': 18598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:57.979349', 'step': 18598, 'epoch': 3} {'type': 'loss', 'content': 0.07821161299943924, 'timestamp': '2025-09-10 02:47:57.981685', 'step': 18599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.012228', 'step': 18599, 'epoch': 3} {'type': 'loss', 'content': 0.12354303151369095, 'timestamp': '2025-09-10 02:47:58.036047', 'step': 18600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:47:58.066881', 'step': 18600, 'epoch': 3} {'type': 'loss', 'content': 0.05277196317911148, 'timestamp': '2025-09-10 02:47:58.069282', 'step': 18601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.099429', 'step': 18601, 'epoch': 3} {'type': 'loss', 'content': 0.038666099309921265, 'timestamp': '2025-09-10 02:47:58.102150', 'step': 18602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.133489', 'step': 18602, 'epoch': 3} {'type': 'loss', 'content': 0.12172171473503113, 'timestamp': '2025-09-10 02:47:58.137298', 'step': 18603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.167003', 'step': 18603, 'epoch': 3} {'type': 'loss', 'content': 0.08728164434432983, 'timestamp': '2025-09-10 02:47:58.190716', 'step': 18604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:58.221136', 'step': 18604, 'epoch': 3} {'type': 'loss', 'content': 0.07928215712308884, 'timestamp': '2025-09-10 02:47:58.223809', 'step': 18605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.255934', 'step': 18605, 'epoch': 3} {'type': 'loss', 'content': 0.18087227642536163, 'timestamp': '2025-09-10 02:47:58.258490', 'step': 18606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:58.289527', 'step': 18606, 'epoch': 3} {'type': 'loss', 'content': 0.03204304724931717, 'timestamp': '2025-09-10 02:47:58.291749', 'step': 18607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.324306', 'step': 18607, 'epoch': 3} {'type': 'loss', 'content': 0.05485662817955017, 'timestamp': '2025-09-10 02:47:58.349391', 'step': 18608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:58.380302', 'step': 18608, 'epoch': 3} {'type': 'loss', 'content': 0.08074760437011719, 'timestamp': '2025-09-10 02:47:58.382712', 'step': 18609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:58.412067', 'step': 18609, 'epoch': 3} {'type': 'loss', 'content': 0.09110236167907715, 'timestamp': '2025-09-10 02:47:58.414463', 'step': 18610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:58.444439', 'step': 18610, 'epoch': 3} {'type': 'loss', 'content': 0.0707482248544693, 'timestamp': '2025-09-10 02:47:58.447719', 'step': 18611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:58.477709', 'step': 18611, 'epoch': 3} {'type': 'loss', 'content': 0.059400659054517746, 'timestamp': '2025-09-10 02:47:58.503334', 'step': 18612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:58.534330', 'step': 18612, 'epoch': 3} {'type': 'loss', 'content': 0.04790659621357918, 'timestamp': '2025-09-10 02:47:58.536887', 'step': 18613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.567690', 'step': 18613, 'epoch': 3} {'type': 'loss', 'content': 0.15098771452903748, 'timestamp': '2025-09-10 02:47:58.570548', 'step': 18614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.601343', 'step': 18614, 'epoch': 3} {'type': 'loss', 'content': 0.1548061966896057, 'timestamp': '2025-09-10 02:47:58.603803', 'step': 18615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.638309', 'step': 18615, 'epoch': 3} {'type': 'loss', 'content': 0.051849059760570526, 'timestamp': '2025-09-10 02:47:58.663552', 'step': 18616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:47:58.701129', 'step': 18616, 'epoch': 3} {'type': 'loss', 'content': 0.03142103925347328, 'timestamp': '2025-09-10 02:47:58.705632', 'step': 18617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.741285', 'step': 18617, 'epoch': 3} {'type': 'loss', 'content': 0.059290364384651184, 'timestamp': '2025-09-10 02:47:58.743826', 'step': 18618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.778899', 'step': 18618, 'epoch': 3} {'type': 'loss', 'content': 0.04255964234471321, 'timestamp': '2025-09-10 02:47:58.783797', 'step': 18619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:58.822464', 'step': 18619, 'epoch': 3} {'type': 'loss', 'content': 0.050018664449453354, 'timestamp': '2025-09-10 02:47:58.847181', 'step': 18620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:58.880281', 'step': 18620, 'epoch': 3} {'type': 'loss', 'content': 0.07968328148126602, 'timestamp': '2025-09-10 02:47:58.882525', 'step': 18621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:58.913416', 'step': 18621, 'epoch': 3} {'type': 'loss', 'content': 0.08651235699653625, 'timestamp': '2025-09-10 02:47:58.916344', 'step': 18622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:58.947601', 'step': 18622, 'epoch': 3} {'type': 'loss', 'content': 0.07035832107067108, 'timestamp': '2025-09-10 02:47:58.951085', 'step': 18623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:58.981197', 'step': 18623, 'epoch': 3} {'type': 'loss', 'content': 0.07909318059682846, 'timestamp': '2025-09-10 02:47:59.004619', 'step': 18624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:59.035032', 'step': 18624, 'epoch': 3} {'type': 'loss', 'content': 0.08540476858615875, 'timestamp': '2025-09-10 02:47:59.037995', 'step': 18625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:59.067911', 'step': 18625, 'epoch': 3} {'type': 'loss', 'content': 0.07879263907670975, 'timestamp': '2025-09-10 02:47:59.070299', 'step': 18626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:59.100020', 'step': 18626, 'epoch': 3} {'type': 'loss', 'content': 0.09886320680379868, 'timestamp': '2025-09-10 02:47:59.102157', 'step': 18627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:59.132257', 'step': 18627, 'epoch': 3} {'type': 'loss', 'content': 0.11228740960359573, 'timestamp': '2025-09-10 02:47:59.155765', 'step': 18628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:59.186856', 'step': 18628, 'epoch': 3} {'type': 'loss', 'content': 0.08658307790756226, 'timestamp': '2025-09-10 02:47:59.189534', 'step': 18629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:47:59.220199', 'step': 18629, 'epoch': 3} {'type': 'loss', 'content': 0.03743886947631836, 'timestamp': '2025-09-10 02:47:59.222414', 'step': 18630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:59.252858', 'step': 18630, 'epoch': 3} {'type': 'loss', 'content': 0.056281957775354385, 'timestamp': '2025-09-10 02:47:59.255116', 'step': 18631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:59.284983', 'step': 18631, 'epoch': 3} {'type': 'loss', 'content': 0.042692359536886215, 'timestamp': '2025-09-10 02:47:59.308681', 'step': 18632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:59.339468', 'step': 18632, 'epoch': 3} {'type': 'loss', 'content': 0.0973198264837265, 'timestamp': '2025-09-10 02:47:59.342318', 'step': 18633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:59.373028', 'step': 18633, 'epoch': 3} {'type': 'loss', 'content': 0.14460289478302002, 'timestamp': '2025-09-10 02:47:59.375567', 'step': 18634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:47:59.405953', 'step': 18634, 'epoch': 3} {'type': 'loss', 'content': 0.12993265688419342, 'timestamp': '2025-09-10 02:47:59.408307', 'step': 18635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:59.438386', 'step': 18635, 'epoch': 3} {'type': 'loss', 'content': 0.08484496921300888, 'timestamp': '2025-09-10 02:47:59.463360', 'step': 18636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:59.493876', 'step': 18636, 'epoch': 3} {'type': 'loss', 'content': 0.04301457107067108, 'timestamp': '2025-09-10 02:47:59.496546', 'step': 18637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:47:59.526738', 'step': 18637, 'epoch': 3} {'type': 'loss', 'content': 0.03005499765276909, 'timestamp': '2025-09-10 02:47:59.529287', 'step': 18638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:47:59.559122', 'step': 18638, 'epoch': 3} {'type': 'loss', 'content': 0.08474662154912949, 'timestamp': '2025-09-10 02:47:59.561459', 'step': 18639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:47:59.591225', 'step': 18639, 'epoch': 3} {'type': 'loss', 'content': 0.07289140671491623, 'timestamp': '2025-09-10 02:47:59.614849', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:48:07.518385', 'step': 18640, 'epoch': 3} {'type': 'pplx', 'content': 13380.243941353498, 'timestamp': '2025-09-10 02:48:07.521454', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:07.551497', 'step': 18640, 'epoch': 3} {'type': 'loss', 'content': 0.09496772289276123, 'timestamp': '2025-09-10 02:48:07.557840', 'step': 18641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:07.594810', 'step': 18641, 'epoch': 3} {'type': 'loss', 'content': 0.04094555601477623, 'timestamp': '2025-09-10 02:48:07.597387', 'step': 18642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:07.627517', 'step': 18642, 'epoch': 3} {'type': 'loss', 'content': 0.090330570936203, 'timestamp': '2025-09-10 02:48:07.630522', 'step': 18643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:07.660747', 'step': 18643, 'epoch': 3} {'type': 'loss', 'content': 0.05506884306669235, 'timestamp': '2025-09-10 02:48:07.684617', 'step': 18644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:07.714894', 'step': 18644, 'epoch': 3} {'type': 'loss', 'content': 0.1307094693183899, 'timestamp': '2025-09-10 02:48:07.717647', 'step': 18645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:07.748373', 'step': 18645, 'epoch': 3} {'type': 'loss', 'content': 0.15142980217933655, 'timestamp': '2025-09-10 02:48:07.750422', 'step': 18646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:07.780388', 'step': 18646, 'epoch': 3} {'type': 'loss', 'content': 0.09062241017818451, 'timestamp': '2025-09-10 02:48:07.782554', 'step': 18647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:07.814909', 'step': 18647, 'epoch': 3} {'type': 'loss', 'content': 0.07399950921535492, 'timestamp': '2025-09-10 02:48:07.839080', 'step': 18648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:07.877210', 'step': 18648, 'epoch': 3} {'type': 'loss', 'content': 0.018407946452498436, 'timestamp': '2025-09-10 02:48:07.879589', 'step': 18649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:07.909785', 'step': 18649, 'epoch': 3} {'type': 'loss', 'content': 0.10018540173768997, 'timestamp': '2025-09-10 02:48:07.912353', 'step': 18650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:07.942505', 'step': 18650, 'epoch': 3} {'type': 'loss', 'content': 0.06360498815774918, 'timestamp': '2025-09-10 02:48:07.945138', 'step': 18651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:07.977135', 'step': 18651, 'epoch': 3} {'type': 'loss', 'content': 0.18269146978855133, 'timestamp': '2025-09-10 02:48:08.001911', 'step': 18652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:08.042599', 'step': 18652, 'epoch': 3} {'type': 'loss', 'content': 0.13431143760681152, 'timestamp': '2025-09-10 02:48:08.045132', 'step': 18653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:08.075858', 'step': 18653, 'epoch': 3} {'type': 'loss', 'content': 0.04986056685447693, 'timestamp': '2025-09-10 02:48:08.077982', 'step': 18654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:08.107379', 'step': 18654, 'epoch': 3} {'type': 'loss', 'content': 0.03926416113972664, 'timestamp': '2025-09-10 02:48:08.109544', 'step': 18655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:08.139610', 'step': 18655, 'epoch': 3} {'type': 'loss', 'content': 0.11089618504047394, 'timestamp': '2025-09-10 02:48:08.163605', 'step': 18656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:08.196467', 'step': 18656, 'epoch': 3} {'type': 'loss', 'content': 0.06718460470438004, 'timestamp': '2025-09-10 02:48:08.198909', 'step': 18657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:08.228871', 'step': 18657, 'epoch': 3} {'type': 'loss', 'content': 0.010452382266521454, 'timestamp': '2025-09-10 02:48:08.230939', 'step': 18658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:08.268006', 'step': 18658, 'epoch': 3} {'type': 'loss', 'content': 0.07511045783758163, 'timestamp': '2025-09-10 02:48:08.270478', 'step': 18659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:08.301596', 'step': 18659, 'epoch': 3} {'type': 'loss', 'content': 0.08081257343292236, 'timestamp': '2025-09-10 02:48:08.325165', 'step': 18660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:08.357087', 'step': 18660, 'epoch': 3} {'type': 'loss', 'content': 0.1028493270277977, 'timestamp': '2025-09-10 02:48:08.359913', 'step': 18661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:08.389659', 'step': 18661, 'epoch': 3} {'type': 'loss', 'content': 0.01903277449309826, 'timestamp': '2025-09-10 02:48:08.391917', 'step': 18662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:08.422023', 'step': 18662, 'epoch': 3} {'type': 'loss', 'content': 0.09425336867570877, 'timestamp': '2025-09-10 02:48:08.424242', 'step': 18663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:08.461815', 'step': 18663, 'epoch': 3} {'type': 'loss', 'content': 0.12056479603052139, 'timestamp': '2025-09-10 02:48:08.487347', 'step': 18664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:08.521619', 'step': 18664, 'epoch': 3} {'type': 'loss', 'content': 0.08944518119096756, 'timestamp': '2025-09-10 02:48:08.523902', 'step': 18665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:08.554641', 'step': 18665, 'epoch': 3} {'type': 'loss', 'content': 0.05884061008691788, 'timestamp': '2025-09-10 02:48:08.558256', 'step': 18666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:08.599175', 'step': 18666, 'epoch': 3} {'type': 'loss', 'content': 0.04685073718428612, 'timestamp': '2025-09-10 02:48:08.603382', 'step': 18667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:08.635040', 'step': 18667, 'epoch': 3} {'type': 'loss', 'content': 0.071722611784935, 'timestamp': '2025-09-10 02:48:08.658797', 'step': 18668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:08.689070', 'step': 18668, 'epoch': 3} {'type': 'loss', 'content': 0.1280824840068817, 'timestamp': '2025-09-10 02:48:08.691438', 'step': 18669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:08.724130', 'step': 18669, 'epoch': 3} {'type': 'loss', 'content': 0.08542173355817795, 'timestamp': '2025-09-10 02:48:08.726417', 'step': 18670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:08.756403', 'step': 18670, 'epoch': 3} {'type': 'loss', 'content': 0.07475455850362778, 'timestamp': '2025-09-10 02:48:08.760427', 'step': 18671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:08.790776', 'step': 18671, 'epoch': 3} {'type': 'loss', 'content': 0.04310975223779678, 'timestamp': '2025-09-10 02:48:08.814595', 'step': 18672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:08.845879', 'step': 18672, 'epoch': 3} {'type': 'loss', 'content': 0.06081370636820793, 'timestamp': '2025-09-10 02:48:08.848211', 'step': 18673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:08.878703', 'step': 18673, 'epoch': 3} {'type': 'loss', 'content': 0.07700738310813904, 'timestamp': '2025-09-10 02:48:08.882906', 'step': 18674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:08.913461', 'step': 18674, 'epoch': 3} {'type': 'loss', 'content': 0.05271846428513527, 'timestamp': '2025-09-10 02:48:08.916363', 'step': 18675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:08.948453', 'step': 18675, 'epoch': 3} {'type': 'loss', 'content': 0.07854156941175461, 'timestamp': '2025-09-10 02:48:08.975102', 'step': 18676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:09.013798', 'step': 18676, 'epoch': 3} {'type': 'loss', 'content': 0.05327591672539711, 'timestamp': '2025-09-10 02:48:09.017498', 'step': 18677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:48:09.051694', 'step': 18677, 'epoch': 3} {'type': 'loss', 'content': 0.1168607622385025, 'timestamp': '2025-09-10 02:48:09.056219', 'step': 18678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:09.087575', 'step': 18678, 'epoch': 3} {'type': 'loss', 'content': 0.04605667293071747, 'timestamp': '2025-09-10 02:48:09.090091', 'step': 18679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:09.120316', 'step': 18679, 'epoch': 3} {'type': 'loss', 'content': 0.0369722917675972, 'timestamp': '2025-09-10 02:48:09.143749', 'step': 18680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:09.176301', 'step': 18680, 'epoch': 3} {'type': 'loss', 'content': 0.03446877375245094, 'timestamp': '2025-09-10 02:48:09.178412', 'step': 18681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:09.208567', 'step': 18681, 'epoch': 3} {'type': 'loss', 'content': 0.06459138542413712, 'timestamp': '2025-09-10 02:48:09.211422', 'step': 18682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:09.241554', 'step': 18682, 'epoch': 3} {'type': 'loss', 'content': 0.060131534934043884, 'timestamp': '2025-09-10 02:48:09.244158', 'step': 18683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:09.277544', 'step': 18683, 'epoch': 3} {'type': 'loss', 'content': 0.06463360041379929, 'timestamp': '2025-09-10 02:48:09.301187', 'step': 18684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:09.331515', 'step': 18684, 'epoch': 3} {'type': 'loss', 'content': 0.021023934707045555, 'timestamp': '2025-09-10 02:48:09.334417', 'step': 18685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:09.367998', 'step': 18685, 'epoch': 3} {'type': 'loss', 'content': 0.048610102385282516, 'timestamp': '2025-09-10 02:48:09.370796', 'step': 18686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:09.410970', 'step': 18686, 'epoch': 3} {'type': 'loss', 'content': 0.03371569514274597, 'timestamp': '2025-09-10 02:48:09.413634', 'step': 18687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:09.444217', 'step': 18687, 'epoch': 3} {'type': 'loss', 'content': 0.13915084302425385, 'timestamp': '2025-09-10 02:48:09.470444', 'step': 18688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:09.507914', 'step': 18688, 'epoch': 3} {'type': 'loss', 'content': 0.0730111375451088, 'timestamp': '2025-09-10 02:48:09.511521', 'step': 18689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:09.542555', 'step': 18689, 'epoch': 3} {'type': 'loss', 'content': 0.03814692795276642, 'timestamp': '2025-09-10 02:48:09.549912', 'step': 18690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:09.583947', 'step': 18690, 'epoch': 3} {'type': 'loss', 'content': 0.03183085471391678, 'timestamp': '2025-09-10 02:48:09.586545', 'step': 18691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:09.619366', 'step': 18691, 'epoch': 3} {'type': 'loss', 'content': 0.07709673792123795, 'timestamp': '2025-09-10 02:48:09.643054', 'step': 18692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:09.675572', 'step': 18692, 'epoch': 3} {'type': 'loss', 'content': 0.059252940118312836, 'timestamp': '2025-09-10 02:48:09.678327', 'step': 18693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:09.708918', 'step': 18693, 'epoch': 3} {'type': 'loss', 'content': 0.10258642584085464, 'timestamp': '2025-09-10 02:48:09.711512', 'step': 18694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:09.741789', 'step': 18694, 'epoch': 3} {'type': 'loss', 'content': 0.018268244341015816, 'timestamp': '2025-09-10 02:48:09.744034', 'step': 18695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:09.777401', 'step': 18695, 'epoch': 3} {'type': 'loss', 'content': 0.024661289528012276, 'timestamp': '2025-09-10 02:48:09.800750', 'step': 18696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:09.832341', 'step': 18696, 'epoch': 3} {'type': 'loss', 'content': 0.0808074101805687, 'timestamp': '2025-09-10 02:48:09.835240', 'step': 18697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:09.865406', 'step': 18697, 'epoch': 3} {'type': 'loss', 'content': 0.051277633756399155, 'timestamp': '2025-09-10 02:48:09.867908', 'step': 18698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:09.899316', 'step': 18698, 'epoch': 3} {'type': 'loss', 'content': 0.028487958014011383, 'timestamp': '2025-09-10 02:48:09.902124', 'step': 18699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:09.933394', 'step': 18699, 'epoch': 3} {'type': 'loss', 'content': 0.051226526498794556, 'timestamp': '2025-09-10 02:48:09.958011', 'step': 18700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:09.989246', 'step': 18700, 'epoch': 3} {'type': 'loss', 'content': 0.06565312296152115, 'timestamp': '2025-09-10 02:48:09.991915', 'step': 18701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:10.023362', 'step': 18701, 'epoch': 3} {'type': 'loss', 'content': 0.03635573759675026, 'timestamp': '2025-09-10 02:48:10.025966', 'step': 18702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:10.062528', 'step': 18702, 'epoch': 3} {'type': 'loss', 'content': 0.07198487222194672, 'timestamp': '2025-09-10 02:48:10.065469', 'step': 18703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:10.097445', 'step': 18703, 'epoch': 3} {'type': 'loss', 'content': 0.0420905277132988, 'timestamp': '2025-09-10 02:48:10.122484', 'step': 18704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:10.154247', 'step': 18704, 'epoch': 3} {'type': 'loss', 'content': 0.09026452898979187, 'timestamp': '2025-09-10 02:48:10.156235', 'step': 18705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:10.190427', 'step': 18705, 'epoch': 3} {'type': 'loss', 'content': 0.030023086816072464, 'timestamp': '2025-09-10 02:48:10.192813', 'step': 18706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:10.223476', 'step': 18706, 'epoch': 3} {'type': 'loss', 'content': 0.05940943956375122, 'timestamp': '2025-09-10 02:48:10.226008', 'step': 18707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:48:10.256530', 'step': 18707, 'epoch': 3} {'type': 'loss', 'content': 0.11610844731330872, 'timestamp': '2025-09-10 02:48:10.281443', 'step': 18708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:10.312139', 'step': 18708, 'epoch': 3} {'type': 'loss', 'content': 0.05550816282629967, 'timestamp': '2025-09-10 02:48:10.314386', 'step': 18709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:10.345312', 'step': 18709, 'epoch': 3} {'type': 'loss', 'content': 0.04128225892782211, 'timestamp': '2025-09-10 02:48:10.347997', 'step': 18710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:10.378691', 'step': 18710, 'epoch': 3} {'type': 'loss', 'content': 0.06870297342538834, 'timestamp': '2025-09-10 02:48:10.382555', 'step': 18711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:10.413372', 'step': 18711, 'epoch': 3} {'type': 'loss', 'content': 0.04120820388197899, 'timestamp': '2025-09-10 02:48:10.437012', 'step': 18712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:10.467056', 'step': 18712, 'epoch': 3} {'type': 'loss', 'content': 0.08346500992774963, 'timestamp': '2025-09-10 02:48:10.469353', 'step': 18713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:10.499880', 'step': 18713, 'epoch': 3} {'type': 'loss', 'content': 0.0832042321562767, 'timestamp': '2025-09-10 02:48:10.502374', 'step': 18714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:10.533027', 'step': 18714, 'epoch': 3} {'type': 'loss', 'content': 0.1151147335767746, 'timestamp': '2025-09-10 02:48:10.535433', 'step': 18715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:10.567604', 'step': 18715, 'epoch': 3} {'type': 'loss', 'content': 0.038103923201560974, 'timestamp': '2025-09-10 02:48:10.594711', 'step': 18716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:10.624867', 'step': 18716, 'epoch': 3} {'type': 'loss', 'content': 0.01284710131585598, 'timestamp': '2025-09-10 02:48:10.627341', 'step': 18717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:10.658477', 'step': 18717, 'epoch': 3} {'type': 'loss', 'content': 0.025685595348477364, 'timestamp': '2025-09-10 02:48:10.660841', 'step': 18718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:10.691626', 'step': 18718, 'epoch': 3} {'type': 'loss', 'content': 0.10573450475931168, 'timestamp': '2025-09-10 02:48:10.694480', 'step': 18719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:10.726137', 'step': 18719, 'epoch': 3} {'type': 'loss', 'content': 0.06451596319675446, 'timestamp': '2025-09-10 02:48:10.750012', 'step': 18720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:10.781256', 'step': 18720, 'epoch': 3} {'type': 'loss', 'content': 0.03424627333879471, 'timestamp': '2025-09-10 02:48:10.784286', 'step': 18721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:10.818033', 'step': 18721, 'epoch': 3} {'type': 'loss', 'content': 0.05963751673698425, 'timestamp': '2025-09-10 02:48:10.820533', 'step': 18722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:10.850582', 'step': 18722, 'epoch': 3} {'type': 'loss', 'content': 0.03644998371601105, 'timestamp': '2025-09-10 02:48:10.852746', 'step': 18723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:10.883267', 'step': 18723, 'epoch': 3} {'type': 'loss', 'content': 0.14795254170894623, 'timestamp': '2025-09-10 02:48:10.907304', 'step': 18724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:10.937646', 'step': 18724, 'epoch': 3} {'type': 'loss', 'content': 0.0249562356621027, 'timestamp': '2025-09-10 02:48:10.940239', 'step': 18725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:10.971200', 'step': 18725, 'epoch': 3} {'type': 'loss', 'content': 0.03975750133395195, 'timestamp': '2025-09-10 02:48:10.973413', 'step': 18726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:11.003483', 'step': 18726, 'epoch': 3} {'type': 'loss', 'content': 0.04755030572414398, 'timestamp': '2025-09-10 02:48:11.006312', 'step': 18727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:11.040385', 'step': 18727, 'epoch': 3} {'type': 'loss', 'content': 0.007783561013638973, 'timestamp': '2025-09-10 02:48:11.064558', 'step': 18728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.094786', 'step': 18728, 'epoch': 3} {'type': 'loss', 'content': 0.08532199263572693, 'timestamp': '2025-09-10 02:48:11.097183', 'step': 18729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:11.127257', 'step': 18729, 'epoch': 3} {'type': 'loss', 'content': 0.08870534598827362, 'timestamp': '2025-09-10 02:48:11.129684', 'step': 18730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:11.161238', 'step': 18730, 'epoch': 3} {'type': 'loss', 'content': 0.0972835123538971, 'timestamp': '2025-09-10 02:48:11.163562', 'step': 18731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.193358', 'step': 18731, 'epoch': 3} {'type': 'loss', 'content': 0.08596319705247879, 'timestamp': '2025-09-10 02:48:11.217159', 'step': 18732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:11.249443', 'step': 18732, 'epoch': 3} {'type': 'loss', 'content': 0.06788544356822968, 'timestamp': '2025-09-10 02:48:11.251941', 'step': 18733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.282019', 'step': 18733, 'epoch': 3} {'type': 'loss', 'content': 0.010145852342247963, 'timestamp': '2025-09-10 02:48:11.284427', 'step': 18734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.316986', 'step': 18734, 'epoch': 3} {'type': 'loss', 'content': 0.03903115540742874, 'timestamp': '2025-09-10 02:48:11.319444', 'step': 18735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:11.350058', 'step': 18735, 'epoch': 3} {'type': 'loss', 'content': 0.06306007504463196, 'timestamp': '2025-09-10 02:48:11.373642', 'step': 18736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:11.404141', 'step': 18736, 'epoch': 3} {'type': 'loss', 'content': 0.09474281221628189, 'timestamp': '2025-09-10 02:48:11.406595', 'step': 18737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:11.436191', 'step': 18737, 'epoch': 3} {'type': 'loss', 'content': 0.016369884833693504, 'timestamp': '2025-09-10 02:48:11.439028', 'step': 18738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:11.469570', 'step': 18738, 'epoch': 3} {'type': 'loss', 'content': 0.08294815570116043, 'timestamp': '2025-09-10 02:48:11.472033', 'step': 18739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.501720', 'step': 18739, 'epoch': 3} {'type': 'loss', 'content': 0.03441642224788666, 'timestamp': '2025-09-10 02:48:11.525941', 'step': 18740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.556130', 'step': 18740, 'epoch': 3} {'type': 'loss', 'content': 0.05632282793521881, 'timestamp': '2025-09-10 02:48:11.558611', 'step': 18741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.589955', 'step': 18741, 'epoch': 3} {'type': 'loss', 'content': 0.09179098159074783, 'timestamp': '2025-09-10 02:48:11.592203', 'step': 18742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.624068', 'step': 18742, 'epoch': 3} {'type': 'loss', 'content': 0.0223077405244112, 'timestamp': '2025-09-10 02:48:11.626500', 'step': 18743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.656699', 'step': 18743, 'epoch': 3} {'type': 'loss', 'content': 0.0798417329788208, 'timestamp': '2025-09-10 02:48:11.680714', 'step': 18744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:11.710732', 'step': 18744, 'epoch': 3} {'type': 'loss', 'content': 0.027876660227775574, 'timestamp': '2025-09-10 02:48:11.713702', 'step': 18745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:11.744287', 'step': 18745, 'epoch': 3} {'type': 'loss', 'content': 0.12603437900543213, 'timestamp': '2025-09-10 02:48:11.746986', 'step': 18746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:11.776733', 'step': 18746, 'epoch': 3} {'type': 'loss', 'content': 0.015942055732011795, 'timestamp': '2025-09-10 02:48:11.779199', 'step': 18747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:11.809488', 'step': 18747, 'epoch': 3} {'type': 'loss', 'content': 0.1267050951719284, 'timestamp': '2025-09-10 02:48:11.833350', 'step': 18748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:11.865249', 'step': 18748, 'epoch': 3} {'type': 'loss', 'content': 0.0649469643831253, 'timestamp': '2025-09-10 02:48:11.867795', 'step': 18749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:11.899471', 'step': 18749, 'epoch': 3} {'type': 'loss', 'content': 0.06638117134571075, 'timestamp': '2025-09-10 02:48:11.902248', 'step': 18750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:11.932285', 'step': 18750, 'epoch': 3} {'type': 'loss', 'content': 0.0309689212590456, 'timestamp': '2025-09-10 02:48:11.934993', 'step': 18751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:11.966669', 'step': 18751, 'epoch': 3} {'type': 'loss', 'content': 0.09982658177614212, 'timestamp': '2025-09-10 02:48:11.990164', 'step': 18752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:12.020871', 'step': 18752, 'epoch': 3} {'type': 'loss', 'content': 0.06842248141765594, 'timestamp': '2025-09-10 02:48:12.023160', 'step': 18753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:12.053057', 'step': 18753, 'epoch': 3} {'type': 'loss', 'content': 0.06932445615530014, 'timestamp': '2025-09-10 02:48:12.055363', 'step': 18754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.085575', 'step': 18754, 'epoch': 3} {'type': 'loss', 'content': 0.05362563207745552, 'timestamp': '2025-09-10 02:48:12.088261', 'step': 18755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.119631', 'step': 18755, 'epoch': 3} {'type': 'loss', 'content': 0.08807078748941422, 'timestamp': '2025-09-10 02:48:12.143520', 'step': 18756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:12.176283', 'step': 18756, 'epoch': 3} {'type': 'loss', 'content': 0.053807057440280914, 'timestamp': '2025-09-10 02:48:12.178678', 'step': 18757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.211466', 'step': 18757, 'epoch': 3} {'type': 'loss', 'content': 0.07331748306751251, 'timestamp': '2025-09-10 02:48:12.213721', 'step': 18758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:12.244784', 'step': 18758, 'epoch': 3} {'type': 'loss', 'content': 0.04246189817786217, 'timestamp': '2025-09-10 02:48:12.247171', 'step': 18759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:12.277151', 'step': 18759, 'epoch': 3} {'type': 'loss', 'content': 0.08126094937324524, 'timestamp': '2025-09-10 02:48:12.300854', 'step': 18760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:12.334347', 'step': 18760, 'epoch': 3} {'type': 'loss', 'content': 0.05365295335650444, 'timestamp': '2025-09-10 02:48:12.336787', 'step': 18761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.367305', 'step': 18761, 'epoch': 3} {'type': 'loss', 'content': 0.0574171356856823, 'timestamp': '2025-09-10 02:48:12.369904', 'step': 18762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:12.400326', 'step': 18762, 'epoch': 3} {'type': 'loss', 'content': 0.013763176277279854, 'timestamp': '2025-09-10 02:48:12.403135', 'step': 18763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:12.437026', 'step': 18763, 'epoch': 3} {'type': 'loss', 'content': 0.13044866919517517, 'timestamp': '2025-09-10 02:48:12.461474', 'step': 18764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.491952', 'step': 18764, 'epoch': 3} {'type': 'loss', 'content': 0.11371420323848724, 'timestamp': '2025-09-10 02:48:12.494418', 'step': 18765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:12.524883', 'step': 18765, 'epoch': 3} {'type': 'loss', 'content': 0.03211908042430878, 'timestamp': '2025-09-10 02:48:12.528727', 'step': 18766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:12.559451', 'step': 18766, 'epoch': 3} {'type': 'loss', 'content': 0.11525150388479233, 'timestamp': '2025-09-10 02:48:12.561834', 'step': 18767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.592575', 'step': 18767, 'epoch': 3} {'type': 'loss', 'content': 0.0836675688624382, 'timestamp': '2025-09-10 02:48:12.620583', 'step': 18768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.650935', 'step': 18768, 'epoch': 3} {'type': 'loss', 'content': 0.0818910300731659, 'timestamp': '2025-09-10 02:48:12.653678', 'step': 18769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.685129', 'step': 18769, 'epoch': 3} {'type': 'loss', 'content': 0.05317720025777817, 'timestamp': '2025-09-10 02:48:12.687528', 'step': 18770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:12.717318', 'step': 18770, 'epoch': 3} {'type': 'loss', 'content': 0.05837756767868996, 'timestamp': '2025-09-10 02:48:12.719945', 'step': 18771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.750382', 'step': 18771, 'epoch': 3} {'type': 'loss', 'content': 0.05211959779262543, 'timestamp': '2025-09-10 02:48:12.774167', 'step': 18772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:12.805463', 'step': 18772, 'epoch': 3} {'type': 'loss', 'content': 0.13280627131462097, 'timestamp': '2025-09-10 02:48:12.808187', 'step': 18773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:12.839770', 'step': 18773, 'epoch': 3} {'type': 'loss', 'content': 0.049018941819667816, 'timestamp': '2025-09-10 02:48:12.842130', 'step': 18774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:12.872628', 'step': 18774, 'epoch': 3} {'type': 'loss', 'content': 0.06964464485645294, 'timestamp': '2025-09-10 02:48:12.875220', 'step': 18775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.906448', 'step': 18775, 'epoch': 3} {'type': 'loss', 'content': 0.08955905586481094, 'timestamp': '2025-09-10 02:48:12.930213', 'step': 18776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.961931', 'step': 18776, 'epoch': 3} {'type': 'loss', 'content': 0.1015981137752533, 'timestamp': '2025-09-10 02:48:12.964806', 'step': 18777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:12.996131', 'step': 18777, 'epoch': 3} {'type': 'loss', 'content': 0.08468210697174072, 'timestamp': '2025-09-10 02:48:12.998678', 'step': 18778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:13.029275', 'step': 18778, 'epoch': 3} {'type': 'loss', 'content': 0.04239824414253235, 'timestamp': '2025-09-10 02:48:13.031547', 'step': 18779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:13.061663', 'step': 18779, 'epoch': 3} {'type': 'loss', 'content': 0.04182415455579758, 'timestamp': '2025-09-10 02:48:13.085671', 'step': 18780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:13.116276', 'step': 18780, 'epoch': 3} {'type': 'loss', 'content': 0.09618107974529266, 'timestamp': '2025-09-10 02:48:13.118580', 'step': 18781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:13.149144', 'step': 18781, 'epoch': 3} {'type': 'loss', 'content': 0.06610900908708572, 'timestamp': '2025-09-10 02:48:13.151601', 'step': 18782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:13.182858', 'step': 18782, 'epoch': 3} {'type': 'loss', 'content': 0.046437229961156845, 'timestamp': '2025-09-10 02:48:13.186558', 'step': 18783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:13.218384', 'step': 18783, 'epoch': 3} {'type': 'loss', 'content': 0.032316211611032486, 'timestamp': '2025-09-10 02:48:13.241837', 'step': 18784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:13.272720', 'step': 18784, 'epoch': 3} {'type': 'loss', 'content': 0.15820203721523285, 'timestamp': '2025-09-10 02:48:13.275276', 'step': 18785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:13.306030', 'step': 18785, 'epoch': 3} {'type': 'loss', 'content': 0.1442049890756607, 'timestamp': '2025-09-10 02:48:13.308387', 'step': 18786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:13.339741', 'step': 18786, 'epoch': 3} {'type': 'loss', 'content': 0.03990212827920914, 'timestamp': '2025-09-10 02:48:13.346553', 'step': 18787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:13.388302', 'step': 18787, 'epoch': 3} {'type': 'loss', 'content': 0.03557370603084564, 'timestamp': '2025-09-10 02:48:13.417097', 'step': 18788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:13.449815', 'step': 18788, 'epoch': 3} {'type': 'loss', 'content': 0.0508963018655777, 'timestamp': '2025-09-10 02:48:13.452131', 'step': 18789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:13.482995', 'step': 18789, 'epoch': 3} {'type': 'loss', 'content': 0.06573055684566498, 'timestamp': '2025-09-10 02:48:13.485577', 'step': 18790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:13.516257', 'step': 18790, 'epoch': 3} {'type': 'loss', 'content': 0.014517208561301231, 'timestamp': '2025-09-10 02:48:13.518856', 'step': 18791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:13.551236', 'step': 18791, 'epoch': 3} {'type': 'loss', 'content': 0.08470145612955093, 'timestamp': '2025-09-10 02:48:13.574913', 'step': 18792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:13.605471', 'step': 18792, 'epoch': 3} {'type': 'loss', 'content': 0.12321082502603531, 'timestamp': '2025-09-10 02:48:13.607975', 'step': 18793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:13.638463', 'step': 18793, 'epoch': 3} {'type': 'loss', 'content': 0.07718940079212189, 'timestamp': '2025-09-10 02:48:13.643080', 'step': 18794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:13.673206', 'step': 18794, 'epoch': 3} {'type': 'loss', 'content': 0.01880164071917534, 'timestamp': '2025-09-10 02:48:13.676695', 'step': 18795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:13.706709', 'step': 18795, 'epoch': 3} {'type': 'loss', 'content': 0.003965005278587341, 'timestamp': '2025-09-10 02:48:13.730300', 'step': 18796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:13.761180', 'step': 18796, 'epoch': 3} {'type': 'loss', 'content': 0.026654846966266632, 'timestamp': '2025-09-10 02:48:13.763453', 'step': 18797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:13.793290', 'step': 18797, 'epoch': 3} {'type': 'loss', 'content': 0.04259197786450386, 'timestamp': '2025-09-10 02:48:13.795802', 'step': 18798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:13.826165', 'step': 18798, 'epoch': 3} {'type': 'loss', 'content': 0.06816104799509048, 'timestamp': '2025-09-10 02:48:13.829549', 'step': 18799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:13.860464', 'step': 18799, 'epoch': 3} {'type': 'loss', 'content': 0.048528630286455154, 'timestamp': '2025-09-10 02:48:13.884284', 'step': 18800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:13.916742', 'step': 18800, 'epoch': 3} {'type': 'loss', 'content': 0.075502410531044, 'timestamp': '2025-09-10 02:48:13.919409', 'step': 18801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:13.950675', 'step': 18801, 'epoch': 3} {'type': 'loss', 'content': 0.07115732133388519, 'timestamp': '2025-09-10 02:48:13.953381', 'step': 18802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:13.985497', 'step': 18802, 'epoch': 3} {'type': 'loss', 'content': 0.028545329347252846, 'timestamp': '2025-09-10 02:48:13.988001', 'step': 18803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:14.018542', 'step': 18803, 'epoch': 3} {'type': 'loss', 'content': 0.11371724307537079, 'timestamp': '2025-09-10 02:48:14.042050', 'step': 18804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.072854', 'step': 18804, 'epoch': 3} {'type': 'loss', 'content': 0.0959671288728714, 'timestamp': '2025-09-10 02:48:14.075873', 'step': 18805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.107120', 'step': 18805, 'epoch': 3} {'type': 'loss', 'content': 0.03199559450149536, 'timestamp': '2025-09-10 02:48:14.109461', 'step': 18806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:14.139310', 'step': 18806, 'epoch': 3} {'type': 'loss', 'content': 0.0883968248963356, 'timestamp': '2025-09-10 02:48:14.143312', 'step': 18807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.173448', 'step': 18807, 'epoch': 3} {'type': 'loss', 'content': 0.08554422855377197, 'timestamp': '2025-09-10 02:48:14.197467', 'step': 18808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:14.228575', 'step': 18808, 'epoch': 3} {'type': 'loss', 'content': 0.09388472884893417, 'timestamp': '2025-09-10 02:48:14.231751', 'step': 18809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:14.262332', 'step': 18809, 'epoch': 3} {'type': 'loss', 'content': 0.060255371034145355, 'timestamp': '2025-09-10 02:48:14.265103', 'step': 18810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.299398', 'step': 18810, 'epoch': 3} {'type': 'loss', 'content': 0.05910592898726463, 'timestamp': '2025-09-10 02:48:14.301907', 'step': 18811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:14.332594', 'step': 18811, 'epoch': 3} {'type': 'loss', 'content': 0.09202300012111664, 'timestamp': '2025-09-10 02:48:14.356454', 'step': 18812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.386414', 'step': 18812, 'epoch': 3} {'type': 'loss', 'content': 0.0276175606995821, 'timestamp': '2025-09-10 02:48:14.389004', 'step': 18813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:14.420673', 'step': 18813, 'epoch': 3} {'type': 'loss', 'content': 0.0799989178776741, 'timestamp': '2025-09-10 02:48:14.423116', 'step': 18814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:14.453581', 'step': 18814, 'epoch': 3} {'type': 'loss', 'content': 0.10548657923936844, 'timestamp': '2025-09-10 02:48:14.456084', 'step': 18815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:14.486305', 'step': 18815, 'epoch': 3} {'type': 'loss', 'content': 0.044154975563287735, 'timestamp': '2025-09-10 02:48:14.510168', 'step': 18816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:14.541200', 'step': 18816, 'epoch': 3} {'type': 'loss', 'content': 0.07913178950548172, 'timestamp': '2025-09-10 02:48:14.543759', 'step': 18817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.574464', 'step': 18817, 'epoch': 3} {'type': 'loss', 'content': 0.10394464433193207, 'timestamp': '2025-09-10 02:48:14.576977', 'step': 18818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:14.607451', 'step': 18818, 'epoch': 3} {'type': 'loss', 'content': 0.07736646384000778, 'timestamp': '2025-09-10 02:48:14.610354', 'step': 18819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:14.641706', 'step': 18819, 'epoch': 3} {'type': 'loss', 'content': 0.16217383742332458, 'timestamp': '2025-09-10 02:48:14.665548', 'step': 18820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.696317', 'step': 18820, 'epoch': 3} {'type': 'loss', 'content': 0.12737613916397095, 'timestamp': '2025-09-10 02:48:14.698899', 'step': 18821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.729455', 'step': 18821, 'epoch': 3} {'type': 'loss', 'content': 0.033374447375535965, 'timestamp': '2025-09-10 02:48:14.731996', 'step': 18822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.764376', 'step': 18822, 'epoch': 3} {'type': 'loss', 'content': 0.06053517758846283, 'timestamp': '2025-09-10 02:48:14.766604', 'step': 18823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:14.796489', 'step': 18823, 'epoch': 3} {'type': 'loss', 'content': 0.0446225106716156, 'timestamp': '2025-09-10 02:48:14.820005', 'step': 18824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:14.850806', 'step': 18824, 'epoch': 3} {'type': 'loss', 'content': 0.09500816464424133, 'timestamp': '2025-09-10 02:48:14.853226', 'step': 18825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:14.883460', 'step': 18825, 'epoch': 3} {'type': 'loss', 'content': 0.06927503645420074, 'timestamp': '2025-09-10 02:48:14.886074', 'step': 18826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:14.917630', 'step': 18826, 'epoch': 3} {'type': 'loss', 'content': 0.07951920479536057, 'timestamp': '2025-09-10 02:48:14.920215', 'step': 18827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:14.950722', 'step': 18827, 'epoch': 3} {'type': 'loss', 'content': 0.035800255835056305, 'timestamp': '2025-09-10 02:48:14.974531', 'step': 18828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:15.005141', 'step': 18828, 'epoch': 3} {'type': 'loss', 'content': 0.11996503174304962, 'timestamp': '2025-09-10 02:48:15.007666', 'step': 18829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.038294', 'step': 18829, 'epoch': 3} {'type': 'loss', 'content': 0.08898269385099411, 'timestamp': '2025-09-10 02:48:15.042027', 'step': 18830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.073210', 'step': 18830, 'epoch': 3} {'type': 'loss', 'content': 0.04535531625151634, 'timestamp': '2025-09-10 02:48:15.075558', 'step': 18831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:15.106497', 'step': 18831, 'epoch': 3} {'type': 'loss', 'content': 0.07795042544603348, 'timestamp': '2025-09-10 02:48:15.130299', 'step': 18832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.160808', 'step': 18832, 'epoch': 3} {'type': 'loss', 'content': 0.08238529413938522, 'timestamp': '2025-09-10 02:48:15.162965', 'step': 18833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:15.192964', 'step': 18833, 'epoch': 3} {'type': 'loss', 'content': 0.053737517446279526, 'timestamp': '2025-09-10 02:48:15.196471', 'step': 18834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.226369', 'step': 18834, 'epoch': 3} {'type': 'loss', 'content': 0.09155584126710892, 'timestamp': '2025-09-10 02:48:15.229280', 'step': 18835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:15.259708', 'step': 18835, 'epoch': 3} {'type': 'loss', 'content': 0.04840362071990967, 'timestamp': '2025-09-10 02:48:15.282872', 'step': 18836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:15.313751', 'step': 18836, 'epoch': 3} {'type': 'loss', 'content': 0.04100877791643143, 'timestamp': '2025-09-10 02:48:15.316098', 'step': 18837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:15.346289', 'step': 18837, 'epoch': 3} {'type': 'loss', 'content': 0.1074187159538269, 'timestamp': '2025-09-10 02:48:15.348699', 'step': 18838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.379525', 'step': 18838, 'epoch': 3} {'type': 'loss', 'content': 0.12687720358371735, 'timestamp': '2025-09-10 02:48:15.382239', 'step': 18839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:15.412752', 'step': 18839, 'epoch': 3} {'type': 'loss', 'content': 0.0953001156449318, 'timestamp': '2025-09-10 02:48:15.436371', 'step': 18840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:15.469206', 'step': 18840, 'epoch': 3} {'type': 'loss', 'content': 0.06453904509544373, 'timestamp': '2025-09-10 02:48:15.471859', 'step': 18841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.502436', 'step': 18841, 'epoch': 3} {'type': 'loss', 'content': 0.08458796888589859, 'timestamp': '2025-09-10 02:48:15.504732', 'step': 18842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:15.535773', 'step': 18842, 'epoch': 3} {'type': 'loss', 'content': 0.07045024633407593, 'timestamp': '2025-09-10 02:48:15.538477', 'step': 18843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:15.571303', 'step': 18843, 'epoch': 3} {'type': 'loss', 'content': 0.07185564190149307, 'timestamp': '2025-09-10 02:48:15.595142', 'step': 18844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.626011', 'step': 18844, 'epoch': 3} {'type': 'loss', 'content': 0.045461397618055344, 'timestamp': '2025-09-10 02:48:15.628545', 'step': 18845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.659575', 'step': 18845, 'epoch': 3} {'type': 'loss', 'content': 0.10838337987661362, 'timestamp': '2025-09-10 02:48:15.661956', 'step': 18846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:15.693364', 'step': 18846, 'epoch': 3} {'type': 'loss', 'content': 0.06078747287392616, 'timestamp': '2025-09-10 02:48:15.695775', 'step': 18847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.726499', 'step': 18847, 'epoch': 3} {'type': 'loss', 'content': 0.03920937702059746, 'timestamp': '2025-09-10 02:48:15.750226', 'step': 18848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.781072', 'step': 18848, 'epoch': 3} {'type': 'loss', 'content': 0.08644165843725204, 'timestamp': '2025-09-10 02:48:15.783655', 'step': 18849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:15.815233', 'step': 18849, 'epoch': 3} {'type': 'loss', 'content': 0.06159226968884468, 'timestamp': '2025-09-10 02:48:15.817710', 'step': 18850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:15.848270', 'step': 18850, 'epoch': 3} {'type': 'loss', 'content': 0.09834428131580353, 'timestamp': '2025-09-10 02:48:15.851583', 'step': 18851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:15.882134', 'step': 18851, 'epoch': 3} {'type': 'loss', 'content': 0.0610186830163002, 'timestamp': '2025-09-10 02:48:15.906049', 'step': 18852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:15.936421', 'step': 18852, 'epoch': 3} {'type': 'loss', 'content': 0.0747421607375145, 'timestamp': '2025-09-10 02:48:15.939341', 'step': 18853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:15.970065', 'step': 18853, 'epoch': 3} {'type': 'loss', 'content': 0.06898565590381622, 'timestamp': '2025-09-10 02:48:15.972152', 'step': 18854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:16.002537', 'step': 18854, 'epoch': 3} {'type': 'loss', 'content': 0.0992373451590538, 'timestamp': '2025-09-10 02:48:16.005271', 'step': 18855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:16.037494', 'step': 18855, 'epoch': 3} {'type': 'loss', 'content': 0.04305363446474075, 'timestamp': '2025-09-10 02:48:16.061536', 'step': 18856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:16.092651', 'step': 18856, 'epoch': 3} {'type': 'loss', 'content': 0.11777249723672867, 'timestamp': '2025-09-10 02:48:16.094977', 'step': 18857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:16.125212', 'step': 18857, 'epoch': 3} {'type': 'loss', 'content': 0.10331939160823822, 'timestamp': '2025-09-10 02:48:16.127495', 'step': 18858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:16.158158', 'step': 18858, 'epoch': 3} {'type': 'loss', 'content': 0.08833914250135422, 'timestamp': '2025-09-10 02:48:16.160809', 'step': 18859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:16.191469', 'step': 18859, 'epoch': 3} {'type': 'loss', 'content': 0.05314430594444275, 'timestamp': '2025-09-10 02:48:16.215197', 'step': 18860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:16.246574', 'step': 18860, 'epoch': 3} {'type': 'loss', 'content': 0.05954224243760109, 'timestamp': '2025-09-10 02:48:16.248945', 'step': 18861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:16.280660', 'step': 18861, 'epoch': 3} {'type': 'loss', 'content': 0.08782782405614853, 'timestamp': '2025-09-10 02:48:16.283172', 'step': 18862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:16.312966', 'step': 18862, 'epoch': 3} {'type': 'loss', 'content': 0.09721800684928894, 'timestamp': '2025-09-10 02:48:16.315094', 'step': 18863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:16.344574', 'step': 18863, 'epoch': 3} {'type': 'loss', 'content': 0.05710596218705177, 'timestamp': '2025-09-10 02:48:16.368586', 'step': 18864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:16.399136', 'step': 18864, 'epoch': 3} {'type': 'loss', 'content': 0.10334523022174835, 'timestamp': '2025-09-10 02:48:16.401721', 'step': 18865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:16.432198', 'step': 18865, 'epoch': 3} {'type': 'loss', 'content': 0.12429442256689072, 'timestamp': '2025-09-10 02:48:16.434792', 'step': 18866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:16.465672', 'step': 18866, 'epoch': 3} {'type': 'loss', 'content': 0.14315025508403778, 'timestamp': '2025-09-10 02:48:16.468675', 'step': 18867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:16.499154', 'step': 18867, 'epoch': 3} {'type': 'loss', 'content': 0.10807112604379654, 'timestamp': '2025-09-10 02:48:16.522733', 'step': 18868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:16.564573', 'step': 18868, 'epoch': 3} {'type': 'loss', 'content': 0.028274504467844963, 'timestamp': '2025-09-10 02:48:16.567072', 'step': 18869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:16.597718', 'step': 18869, 'epoch': 3} {'type': 'loss', 'content': 0.07201463729143143, 'timestamp': '2025-09-10 02:48:16.600371', 'step': 18870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:16.630501', 'step': 18870, 'epoch': 3} {'type': 'loss', 'content': 0.1458897739648819, 'timestamp': '2025-09-10 02:48:16.632913', 'step': 18871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:16.663134', 'step': 18871, 'epoch': 3} {'type': 'loss', 'content': 0.033668238669633865, 'timestamp': '2025-09-10 02:48:16.686399', 'step': 18872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:16.716885', 'step': 18872, 'epoch': 3} {'type': 'loss', 'content': 0.1153675764799118, 'timestamp': '2025-09-10 02:48:16.719236', 'step': 18873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:16.749345', 'step': 18873, 'epoch': 3} {'type': 'loss', 'content': 0.008840029127895832, 'timestamp': '2025-09-10 02:48:16.751904', 'step': 18874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:16.782295', 'step': 18874, 'epoch': 3} {'type': 'loss', 'content': 0.04214111343026161, 'timestamp': '2025-09-10 02:48:16.784500', 'step': 18875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:16.815204', 'step': 18875, 'epoch': 3} {'type': 'loss', 'content': 0.04572918638586998, 'timestamp': '2025-09-10 02:48:16.838807', 'step': 18876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:16.869185', 'step': 18876, 'epoch': 3} {'type': 'loss', 'content': 0.07129481434822083, 'timestamp': '2025-09-10 02:48:16.871619', 'step': 18877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:16.902371', 'step': 18877, 'epoch': 3} {'type': 'loss', 'content': 0.11350362002849579, 'timestamp': '2025-09-10 02:48:16.904898', 'step': 18878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:16.935689', 'step': 18878, 'epoch': 3} {'type': 'loss', 'content': 0.07432883977890015, 'timestamp': '2025-09-10 02:48:16.938109', 'step': 18879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:16.968913', 'step': 18879, 'epoch': 3} {'type': 'loss', 'content': 0.044239506125450134, 'timestamp': '2025-09-10 02:48:16.992450', 'step': 18880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:17.025064', 'step': 18880, 'epoch': 3} {'type': 'loss', 'content': 0.09690190851688385, 'timestamp': '2025-09-10 02:48:17.028077', 'step': 18881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:17.060841', 'step': 18881, 'epoch': 3} {'type': 'loss', 'content': 0.04822307080030441, 'timestamp': '2025-09-10 02:48:17.064010', 'step': 18882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:17.098275', 'step': 18882, 'epoch': 3} {'type': 'loss', 'content': 0.08108025789260864, 'timestamp': '2025-09-10 02:48:17.101708', 'step': 18883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:17.133457', 'step': 18883, 'epoch': 3} {'type': 'loss', 'content': 0.08489597588777542, 'timestamp': '2025-09-10 02:48:17.157922', 'step': 18884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:17.187846', 'step': 18884, 'epoch': 3} {'type': 'loss', 'content': 0.037849970161914825, 'timestamp': '2025-09-10 02:48:17.190310', 'step': 18885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:17.220389', 'step': 18885, 'epoch': 3} {'type': 'loss', 'content': 0.18399393558502197, 'timestamp': '2025-09-10 02:48:17.223052', 'step': 18886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:17.253234', 'step': 18886, 'epoch': 3} {'type': 'loss', 'content': 0.07662129402160645, 'timestamp': '2025-09-10 02:48:17.255476', 'step': 18887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:17.286515', 'step': 18887, 'epoch': 3} {'type': 'loss', 'content': 0.048825327306985855, 'timestamp': '2025-09-10 02:48:17.310204', 'step': 18888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:17.340657', 'step': 18888, 'epoch': 3} {'type': 'loss', 'content': 0.13192850351333618, 'timestamp': '2025-09-10 02:48:17.343300', 'step': 18889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:17.374087', 'step': 18889, 'epoch': 3} {'type': 'loss', 'content': 0.15357252955436707, 'timestamp': '2025-09-10 02:48:17.376438', 'step': 18890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:17.406969', 'step': 18890, 'epoch': 3} {'type': 'loss', 'content': 0.0691952183842659, 'timestamp': '2025-09-10 02:48:17.409799', 'step': 18891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:17.441470', 'step': 18891, 'epoch': 3} {'type': 'loss', 'content': 0.09360410273075104, 'timestamp': '2025-09-10 02:48:17.465057', 'step': 18892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:17.497202', 'step': 18892, 'epoch': 3} {'type': 'loss', 'content': 0.10831620544195175, 'timestamp': '2025-09-10 02:48:17.499663', 'step': 18893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:17.529862', 'step': 18893, 'epoch': 3} {'type': 'loss', 'content': 0.07245960831642151, 'timestamp': '2025-09-10 02:48:17.532356', 'step': 18894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:17.563204', 'step': 18894, 'epoch': 3} {'type': 'loss', 'content': 0.04362696409225464, 'timestamp': '2025-09-10 02:48:17.565697', 'step': 18895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:17.597828', 'step': 18895, 'epoch': 3} {'type': 'loss', 'content': 0.05251847952604294, 'timestamp': '2025-09-10 02:48:17.621490', 'step': 18896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:17.661439', 'step': 18896, 'epoch': 3} {'type': 'loss', 'content': 0.10823345184326172, 'timestamp': '2025-09-10 02:48:17.663601', 'step': 18897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:17.693966', 'step': 18897, 'epoch': 3} {'type': 'loss', 'content': 0.051664773374795914, 'timestamp': '2025-09-10 02:48:17.696254', 'step': 18898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:17.726374', 'step': 18898, 'epoch': 3} {'type': 'loss', 'content': 0.13767963647842407, 'timestamp': '2025-09-10 02:48:17.728952', 'step': 18899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:17.758950', 'step': 18899, 'epoch': 3} {'type': 'loss', 'content': 0.02335231751203537, 'timestamp': '2025-09-10 02:48:17.782671', 'step': 18900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:17.812587', 'step': 18900, 'epoch': 3} {'type': 'loss', 'content': 0.05031890794634819, 'timestamp': '2025-09-10 02:48:17.815088', 'step': 18901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:48:17.845346', 'step': 18901, 'epoch': 3} {'type': 'loss', 'content': 0.08714832365512848, 'timestamp': '2025-09-10 02:48:17.849827', 'step': 18902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:17.881963', 'step': 18902, 'epoch': 3} {'type': 'loss', 'content': 0.05635204538702965, 'timestamp': '2025-09-10 02:48:17.884255', 'step': 18903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:17.914221', 'step': 18903, 'epoch': 3} {'type': 'loss', 'content': 0.035919614136219025, 'timestamp': '2025-09-10 02:48:17.937914', 'step': 18904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:17.967537', 'step': 18904, 'epoch': 3} {'type': 'loss', 'content': 0.0786515399813652, 'timestamp': '2025-09-10 02:48:17.969804', 'step': 18905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:18.001938', 'step': 18905, 'epoch': 3} {'type': 'loss', 'content': 0.03082900680601597, 'timestamp': '2025-09-10 02:48:18.004387', 'step': 18906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:18.035400', 'step': 18906, 'epoch': 3} {'type': 'loss', 'content': 0.056451957672834396, 'timestamp': '2025-09-10 02:48:18.037974', 'step': 18907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:18.067791', 'step': 18907, 'epoch': 3} {'type': 'loss', 'content': 0.04595545306801796, 'timestamp': '2025-09-10 02:48:18.091728', 'step': 18908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:18.124198', 'step': 18908, 'epoch': 3} {'type': 'loss', 'content': 0.05014800280332565, 'timestamp': '2025-09-10 02:48:18.126820', 'step': 18909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:48:18.157647', 'step': 18909, 'epoch': 3} {'type': 'loss', 'content': 0.06117439270019531, 'timestamp': '2025-09-10 02:48:18.161701', 'step': 18910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:18.191915', 'step': 18910, 'epoch': 3} {'type': 'loss', 'content': 0.05180088058114052, 'timestamp': '2025-09-10 02:48:18.194193', 'step': 18911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:18.224028', 'step': 18911, 'epoch': 3} {'type': 'loss', 'content': 0.045082107186317444, 'timestamp': '2025-09-10 02:48:18.247755', 'step': 18912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:18.279380', 'step': 18912, 'epoch': 3} {'type': 'loss', 'content': 0.055705804377794266, 'timestamp': '2025-09-10 02:48:18.282915', 'step': 18913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:18.313119', 'step': 18913, 'epoch': 3} {'type': 'loss', 'content': 0.04075030982494354, 'timestamp': '2025-09-10 02:48:18.315485', 'step': 18914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:18.345371', 'step': 18914, 'epoch': 3} {'type': 'loss', 'content': 0.042163606733083725, 'timestamp': '2025-09-10 02:48:18.347487', 'step': 18915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:18.377369', 'step': 18915, 'epoch': 3} {'type': 'loss', 'content': 0.1010575145483017, 'timestamp': '2025-09-10 02:48:18.400873', 'step': 18916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:18.431205', 'step': 18916, 'epoch': 3} {'type': 'loss', 'content': 0.029278624802827835, 'timestamp': '2025-09-10 02:48:18.433924', 'step': 18917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:18.464474', 'step': 18917, 'epoch': 3} {'type': 'loss', 'content': 0.09354449063539505, 'timestamp': '2025-09-10 02:48:18.467126', 'step': 18918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:18.497656', 'step': 18918, 'epoch': 3} {'type': 'loss', 'content': 0.05805119872093201, 'timestamp': '2025-09-10 02:48:18.500569', 'step': 18919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:18.531416', 'step': 18919, 'epoch': 3} {'type': 'loss', 'content': 0.1273532211780548, 'timestamp': '2025-09-10 02:48:18.554743', 'step': 18920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:18.585386', 'step': 18920, 'epoch': 3} {'type': 'loss', 'content': 0.03873029723763466, 'timestamp': '2025-09-10 02:48:18.587564', 'step': 18921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:18.617347', 'step': 18921, 'epoch': 3} {'type': 'loss', 'content': 0.011620807461440563, 'timestamp': '2025-09-10 02:48:18.619794', 'step': 18922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:18.654441', 'step': 18922, 'epoch': 3} {'type': 'loss', 'content': 0.04885847494006157, 'timestamp': '2025-09-10 02:48:18.656662', 'step': 18923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:18.688506', 'step': 18923, 'epoch': 3} {'type': 'loss', 'content': 0.037591371685266495, 'timestamp': '2025-09-10 02:48:18.712251', 'step': 18924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:18.742485', 'step': 18924, 'epoch': 3} {'type': 'loss', 'content': 0.15256059169769287, 'timestamp': '2025-09-10 02:48:18.744967', 'step': 18925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:18.774901', 'step': 18925, 'epoch': 3} {'type': 'loss', 'content': 0.09697418659925461, 'timestamp': '2025-09-10 02:48:18.777517', 'step': 18926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:18.807878', 'step': 18926, 'epoch': 3} {'type': 'loss', 'content': 0.03617951273918152, 'timestamp': '2025-09-10 02:48:18.810200', 'step': 18927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:18.840198', 'step': 18927, 'epoch': 3} {'type': 'loss', 'content': 0.05909401550889015, 'timestamp': '2025-09-10 02:48:18.863799', 'step': 18928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:18.893973', 'step': 18928, 'epoch': 3} {'type': 'loss', 'content': 0.13412567973136902, 'timestamp': '2025-09-10 02:48:18.896362', 'step': 18929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:18.927027', 'step': 18929, 'epoch': 3} {'type': 'loss', 'content': 0.046146947890520096, 'timestamp': '2025-09-10 02:48:18.929517', 'step': 18930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:18.959135', 'step': 18930, 'epoch': 3} {'type': 'loss', 'content': 0.03321600705385208, 'timestamp': '2025-09-10 02:48:18.961458', 'step': 18931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:18.991841', 'step': 18931, 'epoch': 3} {'type': 'loss', 'content': 0.051246337592601776, 'timestamp': '2025-09-10 02:48:19.015209', 'step': 18932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:19.046128', 'step': 18932, 'epoch': 3} {'type': 'loss', 'content': 0.11893787980079651, 'timestamp': '2025-09-10 02:48:19.048320', 'step': 18933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:19.078454', 'step': 18933, 'epoch': 3} {'type': 'loss', 'content': 0.06887947767972946, 'timestamp': '2025-09-10 02:48:19.080676', 'step': 18934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:19.110972', 'step': 18934, 'epoch': 3} {'type': 'loss', 'content': 0.07501108944416046, 'timestamp': '2025-09-10 02:48:19.113305', 'step': 18935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:19.143728', 'step': 18935, 'epoch': 3} {'type': 'loss', 'content': 0.07349599152803421, 'timestamp': '2025-09-10 02:48:19.172487', 'step': 18936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:19.219039', 'step': 18936, 'epoch': 3} {'type': 'loss', 'content': 0.04053186997771263, 'timestamp': '2025-09-10 02:48:19.221598', 'step': 18937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:19.252211', 'step': 18937, 'epoch': 3} {'type': 'loss', 'content': 0.09816841781139374, 'timestamp': '2025-09-10 02:48:19.255194', 'step': 18938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:19.285762', 'step': 18938, 'epoch': 3} {'type': 'loss', 'content': 0.09864754229784012, 'timestamp': '2025-09-10 02:48:19.287911', 'step': 18939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:19.319247', 'step': 18939, 'epoch': 3} {'type': 'loss', 'content': 0.020240843296051025, 'timestamp': '2025-09-10 02:48:19.342851', 'step': 18940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:19.372683', 'step': 18940, 'epoch': 3} {'type': 'loss', 'content': 0.07986029982566833, 'timestamp': '2025-09-10 02:48:19.375450', 'step': 18941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:19.407406', 'step': 18941, 'epoch': 3} {'type': 'loss', 'content': 0.1456737071275711, 'timestamp': '2025-09-10 02:48:19.409650', 'step': 18942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:19.439645', 'step': 18942, 'epoch': 3} {'type': 'loss', 'content': 0.06798539310693741, 'timestamp': '2025-09-10 02:48:19.442560', 'step': 18943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:19.472505', 'step': 18943, 'epoch': 3} {'type': 'loss', 'content': 0.07414235174655914, 'timestamp': '2025-09-10 02:48:19.496406', 'step': 18944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:19.526792', 'step': 18944, 'epoch': 3} {'type': 'loss', 'content': 0.055645450949668884, 'timestamp': '2025-09-10 02:48:19.529173', 'step': 18945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:19.559293', 'step': 18945, 'epoch': 3} {'type': 'loss', 'content': 0.0505400076508522, 'timestamp': '2025-09-10 02:48:19.561725', 'step': 18946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:19.592352', 'step': 18946, 'epoch': 3} {'type': 'loss', 'content': 0.013074349611997604, 'timestamp': '2025-09-10 02:48:19.595164', 'step': 18947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:19.626898', 'step': 18947, 'epoch': 3} {'type': 'loss', 'content': 0.051812924444675446, 'timestamp': '2025-09-10 02:48:19.653330', 'step': 18948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:19.685379', 'step': 18948, 'epoch': 3} {'type': 'loss', 'content': 0.043108608573675156, 'timestamp': '2025-09-10 02:48:19.687594', 'step': 18949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:19.717846', 'step': 18949, 'epoch': 3} {'type': 'loss', 'content': 0.057685330510139465, 'timestamp': '2025-09-10 02:48:19.720407', 'step': 18950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:19.750765', 'step': 18950, 'epoch': 3} {'type': 'loss', 'content': 0.06599534302949905, 'timestamp': '2025-09-10 02:48:19.753124', 'step': 18951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:19.784267', 'step': 18951, 'epoch': 3} {'type': 'loss', 'content': 0.06619737297296524, 'timestamp': '2025-09-10 02:48:19.807824', 'step': 18952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:19.838125', 'step': 18952, 'epoch': 3} {'type': 'loss', 'content': 0.07001247256994247, 'timestamp': '2025-09-10 02:48:19.840397', 'step': 18953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:19.870460', 'step': 18953, 'epoch': 3} {'type': 'loss', 'content': 0.06272921711206436, 'timestamp': '2025-09-10 02:48:19.872884', 'step': 18954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:19.903032', 'step': 18954, 'epoch': 3} {'type': 'loss', 'content': 0.060595277696847916, 'timestamp': '2025-09-10 02:48:19.905469', 'step': 18955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:19.935965', 'step': 18955, 'epoch': 3} {'type': 'loss', 'content': 0.07702245563268661, 'timestamp': '2025-09-10 02:48:19.959380', 'step': 18956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:19.990133', 'step': 18956, 'epoch': 3} {'type': 'loss', 'content': 0.04294012114405632, 'timestamp': '2025-09-10 02:48:19.992419', 'step': 18957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.022664', 'step': 18957, 'epoch': 3} {'type': 'loss', 'content': 0.04718182235956192, 'timestamp': '2025-09-10 02:48:20.025176', 'step': 18958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.056298', 'step': 18958, 'epoch': 3} {'type': 'loss', 'content': 0.05966220423579216, 'timestamp': '2025-09-10 02:48:20.059800', 'step': 18959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:20.090139', 'step': 18959, 'epoch': 3} {'type': 'loss', 'content': 0.03676318749785423, 'timestamp': '2025-09-10 02:48:20.114104', 'step': 18960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.145154', 'step': 18960, 'epoch': 3} {'type': 'loss', 'content': 0.11286964267492294, 'timestamp': '2025-09-10 02:48:20.147414', 'step': 18961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.176891', 'step': 18961, 'epoch': 3} {'type': 'loss', 'content': 0.08369944989681244, 'timestamp': '2025-09-10 02:48:20.179295', 'step': 18962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.209062', 'step': 18962, 'epoch': 3} {'type': 'loss', 'content': 0.07846172899007797, 'timestamp': '2025-09-10 02:48:20.214252', 'step': 18963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:20.245920', 'step': 18963, 'epoch': 3} {'type': 'loss', 'content': 0.03597599267959595, 'timestamp': '2025-09-10 02:48:20.269677', 'step': 18964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:20.299948', 'step': 18964, 'epoch': 3} {'type': 'loss', 'content': 0.022769832983613014, 'timestamp': '2025-09-10 02:48:20.302237', 'step': 18965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.334240', 'step': 18965, 'epoch': 3} {'type': 'loss', 'content': 0.03338027000427246, 'timestamp': '2025-09-10 02:48:20.336488', 'step': 18966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:20.366411', 'step': 18966, 'epoch': 3} {'type': 'loss', 'content': 0.07096115499734879, 'timestamp': '2025-09-10 02:48:20.368658', 'step': 18967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:20.398297', 'step': 18967, 'epoch': 3} {'type': 'loss', 'content': 0.0723176822066307, 'timestamp': '2025-09-10 02:48:20.422076', 'step': 18968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.452490', 'step': 18968, 'epoch': 3} {'type': 'loss', 'content': 0.024307144805788994, 'timestamp': '2025-09-10 02:48:20.454877', 'step': 18969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.485471', 'step': 18969, 'epoch': 3} {'type': 'loss', 'content': 0.09870200604200363, 'timestamp': '2025-09-10 02:48:20.487809', 'step': 18970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.517704', 'step': 18970, 'epoch': 3} {'type': 'loss', 'content': 0.030263928696513176, 'timestamp': '2025-09-10 02:48:20.519940', 'step': 18971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:20.550204', 'step': 18971, 'epoch': 3} {'type': 'loss', 'content': 0.09070596843957901, 'timestamp': '2025-09-10 02:48:20.573683', 'step': 18972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:20.605627', 'step': 18972, 'epoch': 3} {'type': 'loss', 'content': 0.01597869209945202, 'timestamp': '2025-09-10 02:48:20.607730', 'step': 18973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.639346', 'step': 18973, 'epoch': 3} {'type': 'loss', 'content': 0.06506404280662537, 'timestamp': '2025-09-10 02:48:20.642094', 'step': 18974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:20.672659', 'step': 18974, 'epoch': 3} {'type': 'loss', 'content': 0.05908278003334999, 'timestamp': '2025-09-10 02:48:20.675199', 'step': 18975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.706217', 'step': 18975, 'epoch': 3} {'type': 'loss', 'content': 0.04191029816865921, 'timestamp': '2025-09-10 02:48:20.729852', 'step': 18976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.760517', 'step': 18976, 'epoch': 3} {'type': 'loss', 'content': 0.09788208454847336, 'timestamp': '2025-09-10 02:48:20.763184', 'step': 18977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.793682', 'step': 18977, 'epoch': 3} {'type': 'loss', 'content': 0.06969744712114334, 'timestamp': '2025-09-10 02:48:20.796048', 'step': 18978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:20.826321', 'step': 18978, 'epoch': 3} {'type': 'loss', 'content': 0.07823554426431656, 'timestamp': '2025-09-10 02:48:20.828782', 'step': 18979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.858986', 'step': 18979, 'epoch': 3} {'type': 'loss', 'content': 0.09922467917203903, 'timestamp': '2025-09-10 02:48:20.882999', 'step': 18980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:20.912432', 'step': 18980, 'epoch': 3} {'type': 'loss', 'content': 0.07834690809249878, 'timestamp': '2025-09-10 02:48:20.916176', 'step': 18981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:20.947032', 'step': 18981, 'epoch': 3} {'type': 'loss', 'content': 0.09367426484823227, 'timestamp': '2025-09-10 02:48:20.949247', 'step': 18982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:20.979409', 'step': 18982, 'epoch': 3} {'type': 'loss', 'content': 0.047060150653123856, 'timestamp': '2025-09-10 02:48:20.981704', 'step': 18983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:21.012040', 'step': 18983, 'epoch': 3} {'type': 'loss', 'content': 0.06982867419719696, 'timestamp': '2025-09-10 02:48:21.035383', 'step': 18984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:21.066194', 'step': 18984, 'epoch': 3} {'type': 'loss', 'content': 0.0627535954117775, 'timestamp': '2025-09-10 02:48:21.068215', 'step': 18985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:21.097731', 'step': 18985, 'epoch': 3} {'type': 'loss', 'content': 0.03435923904180527, 'timestamp': '2025-09-10 02:48:21.099935', 'step': 18986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:21.129959', 'step': 18986, 'epoch': 3} {'type': 'loss', 'content': 0.07055177539587021, 'timestamp': '2025-09-10 02:48:21.132251', 'step': 18987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:21.162023', 'step': 18987, 'epoch': 3} {'type': 'loss', 'content': 0.020803365856409073, 'timestamp': '2025-09-10 02:48:21.185620', 'step': 18988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:21.215855', 'step': 18988, 'epoch': 3} {'type': 'loss', 'content': 0.09444063901901245, 'timestamp': '2025-09-10 02:48:21.219828', 'step': 18989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:21.250808', 'step': 18989, 'epoch': 3} {'type': 'loss', 'content': 0.030692288652062416, 'timestamp': '2025-09-10 02:48:21.253809', 'step': 18990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:21.286451', 'step': 18990, 'epoch': 3} {'type': 'loss', 'content': 0.020281216129660606, 'timestamp': '2025-09-10 02:48:21.288766', 'step': 18991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:21.319492', 'step': 18991, 'epoch': 3} {'type': 'loss', 'content': 0.07024551928043365, 'timestamp': '2025-09-10 02:48:21.343247', 'step': 18992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:21.373164', 'step': 18992, 'epoch': 3} {'type': 'loss', 'content': 0.05631498992443085, 'timestamp': '2025-09-10 02:48:21.375688', 'step': 18993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:21.405705', 'step': 18993, 'epoch': 3} {'type': 'loss', 'content': 0.007963938638567924, 'timestamp': '2025-09-10 02:48:21.407883', 'step': 18994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:21.437948', 'step': 18994, 'epoch': 3} {'type': 'loss', 'content': 0.06374180316925049, 'timestamp': '2025-09-10 02:48:21.440344', 'step': 18995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:21.470631', 'step': 18995, 'epoch': 3} {'type': 'loss', 'content': 0.06647304445505142, 'timestamp': '2025-09-10 02:48:21.494000', 'step': 18996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:21.524365', 'step': 18996, 'epoch': 3} {'type': 'loss', 'content': 0.05530610680580139, 'timestamp': '2025-09-10 02:48:21.526554', 'step': 18997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:21.560955', 'step': 18997, 'epoch': 3} {'type': 'loss', 'content': 0.0523701049387455, 'timestamp': '2025-09-10 02:48:21.563309', 'step': 18998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:21.593531', 'step': 18998, 'epoch': 3} {'type': 'loss', 'content': 0.018116284161806107, 'timestamp': '2025-09-10 02:48:21.595980', 'step': 18999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:21.625806', 'step': 18999, 'epoch': 3} {'type': 'loss', 'content': 0.0823214054107666, 'timestamp': '2025-09-10 02:48:21.649503', 'step': 19000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19000', 'timestamp': '2025-09-10 02:48:26.412712', 'step': 19000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:26.464704', 'step': 19000, 'epoch': 3} {'type': 'loss', 'content': 0.05608511343598366, 'timestamp': '2025-09-10 02:48:26.467225', 'step': 19001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:26.500433', 'step': 19001, 'epoch': 3} {'type': 'loss', 'content': 0.10182137787342072, 'timestamp': '2025-09-10 02:48:26.502696', 'step': 19002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:26.534598', 'step': 19002, 'epoch': 3} {'type': 'loss', 'content': 0.03888915479183197, 'timestamp': '2025-09-10 02:48:26.537166', 'step': 19003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:26.568909', 'step': 19003, 'epoch': 3} {'type': 'loss', 'content': 0.05959493666887283, 'timestamp': '2025-09-10 02:48:26.593168', 'step': 19004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:26.625389', 'step': 19004, 'epoch': 3} {'type': 'loss', 'content': 0.046678345650434494, 'timestamp': '2025-09-10 02:48:26.627795', 'step': 19005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:26.659451', 'step': 19005, 'epoch': 3} {'type': 'loss', 'content': 0.060457270592451096, 'timestamp': '2025-09-10 02:48:26.661763', 'step': 19006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:26.692415', 'step': 19006, 'epoch': 3} {'type': 'loss', 'content': 0.09913922846317291, 'timestamp': '2025-09-10 02:48:26.694824', 'step': 19007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:26.724555', 'step': 19007, 'epoch': 3} {'type': 'loss', 'content': 0.06636497378349304, 'timestamp': '2025-09-10 02:48:26.748422', 'step': 19008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:26.779159', 'step': 19008, 'epoch': 3} {'type': 'loss', 'content': 0.013867747038602829, 'timestamp': '2025-09-10 02:48:26.781571', 'step': 19009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:26.813196', 'step': 19009, 'epoch': 3} {'type': 'loss', 'content': 0.018796345219016075, 'timestamp': '2025-09-10 02:48:26.815691', 'step': 19010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:26.846098', 'step': 19010, 'epoch': 3} {'type': 'loss', 'content': 0.01692596636712551, 'timestamp': '2025-09-10 02:48:26.848250', 'step': 19011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:26.878266', 'step': 19011, 'epoch': 3} {'type': 'loss', 'content': 0.02632349729537964, 'timestamp': '2025-09-10 02:48:26.902076', 'step': 19012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:26.932049', 'step': 19012, 'epoch': 3} {'type': 'loss', 'content': 0.012837685644626617, 'timestamp': '2025-09-10 02:48:26.934348', 'step': 19013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:26.965237', 'step': 19013, 'epoch': 3} {'type': 'loss', 'content': 0.046173691749572754, 'timestamp': '2025-09-10 02:48:26.967733', 'step': 19014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:27.001614', 'step': 19014, 'epoch': 3} {'type': 'loss', 'content': 0.0511200949549675, 'timestamp': '2025-09-10 02:48:27.003808', 'step': 19015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.034626', 'step': 19015, 'epoch': 3} {'type': 'loss', 'content': 0.09871721267700195, 'timestamp': '2025-09-10 02:48:27.058545', 'step': 19016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.089117', 'step': 19016, 'epoch': 3} {'type': 'loss', 'content': 0.04905545711517334, 'timestamp': '2025-09-10 02:48:27.091867', 'step': 19017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:27.123942', 'step': 19017, 'epoch': 3} {'type': 'loss', 'content': 0.0794524997472763, 'timestamp': '2025-09-10 02:48:27.126923', 'step': 19018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.158052', 'step': 19018, 'epoch': 3} {'type': 'loss', 'content': 0.105211041867733, 'timestamp': '2025-09-10 02:48:27.160690', 'step': 19019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.190712', 'step': 19019, 'epoch': 3} {'type': 'loss', 'content': 0.019086115062236786, 'timestamp': '2025-09-10 02:48:27.215722', 'step': 19020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.247129', 'step': 19020, 'epoch': 3} {'type': 'loss', 'content': 0.04784698039293289, 'timestamp': '2025-09-10 02:48:27.249776', 'step': 19021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:27.280459', 'step': 19021, 'epoch': 3} {'type': 'loss', 'content': 0.07763869315385818, 'timestamp': '2025-09-10 02:48:27.282663', 'step': 19022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.312932', 'step': 19022, 'epoch': 3} {'type': 'loss', 'content': 0.05546921119093895, 'timestamp': '2025-09-10 02:48:27.315507', 'step': 19023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:27.345343', 'step': 19023, 'epoch': 3} {'type': 'loss', 'content': 0.037235796451568604, 'timestamp': '2025-09-10 02:48:27.368714', 'step': 19024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:27.400410', 'step': 19024, 'epoch': 3} {'type': 'loss', 'content': 0.04318489134311676, 'timestamp': '2025-09-10 02:48:27.402749', 'step': 19025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:27.433326', 'step': 19025, 'epoch': 3} {'type': 'loss', 'content': 0.11851891875267029, 'timestamp': '2025-09-10 02:48:27.435460', 'step': 19026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:27.465509', 'step': 19026, 'epoch': 3} {'type': 'loss', 'content': 0.07331334054470062, 'timestamp': '2025-09-10 02:48:27.468234', 'step': 19027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.498192', 'step': 19027, 'epoch': 3} {'type': 'loss', 'content': 0.04961371794342995, 'timestamp': '2025-09-10 02:48:27.522017', 'step': 19028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:27.553271', 'step': 19028, 'epoch': 3} {'type': 'loss', 'content': 0.05374801158905029, 'timestamp': '2025-09-10 02:48:27.555506', 'step': 19029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:27.586279', 'step': 19029, 'epoch': 3} {'type': 'loss', 'content': 0.048996005207300186, 'timestamp': '2025-09-10 02:48:27.588462', 'step': 19030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:27.618791', 'step': 19030, 'epoch': 3} {'type': 'loss', 'content': 0.1335342973470688, 'timestamp': '2025-09-10 02:48:27.621521', 'step': 19031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:27.653109', 'step': 19031, 'epoch': 3} {'type': 'loss', 'content': 0.039226233959198, 'timestamp': '2025-09-10 02:48:27.676821', 'step': 19032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.714229', 'step': 19032, 'epoch': 3} {'type': 'loss', 'content': 0.0642828494310379, 'timestamp': '2025-09-10 02:48:27.716680', 'step': 19033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:27.747649', 'step': 19033, 'epoch': 3} {'type': 'loss', 'content': 0.05446493625640869, 'timestamp': '2025-09-10 02:48:27.750452', 'step': 19034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:27.780881', 'step': 19034, 'epoch': 3} {'type': 'loss', 'content': 0.04233946651220322, 'timestamp': '2025-09-10 02:48:27.783222', 'step': 19035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:27.814224', 'step': 19035, 'epoch': 3} {'type': 'loss', 'content': 0.004158328752964735, 'timestamp': '2025-09-10 02:48:27.837737', 'step': 19036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:27.867953', 'step': 19036, 'epoch': 3} {'type': 'loss', 'content': 0.06389589607715607, 'timestamp': '2025-09-10 02:48:27.870243', 'step': 19037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:27.900364', 'step': 19037, 'epoch': 3} {'type': 'loss', 'content': 0.027210678905248642, 'timestamp': '2025-09-10 02:48:27.904226', 'step': 19038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.934153', 'step': 19038, 'epoch': 3} {'type': 'loss', 'content': 0.09910930693149567, 'timestamp': '2025-09-10 02:48:27.936345', 'step': 19039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:27.967422', 'step': 19039, 'epoch': 3} {'type': 'loss', 'content': 0.009636728093028069, 'timestamp': '2025-09-10 02:48:27.991203', 'step': 19040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.021442', 'step': 19040, 'epoch': 3} {'type': 'loss', 'content': 0.04837043955922127, 'timestamp': '2025-09-10 02:48:28.023751', 'step': 19041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:28.054342', 'step': 19041, 'epoch': 3} {'type': 'loss', 'content': 0.034405600279569626, 'timestamp': '2025-09-10 02:48:28.056773', 'step': 19042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.086983', 'step': 19042, 'epoch': 3} {'type': 'loss', 'content': 0.02791529707610607, 'timestamp': '2025-09-10 02:48:28.089241', 'step': 19043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.119535', 'step': 19043, 'epoch': 3} {'type': 'loss', 'content': 0.047994717955589294, 'timestamp': '2025-09-10 02:48:28.143186', 'step': 19044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:28.173637', 'step': 19044, 'epoch': 3} {'type': 'loss', 'content': 0.07221318036317825, 'timestamp': '2025-09-10 02:48:28.176231', 'step': 19045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.208046', 'step': 19045, 'epoch': 3} {'type': 'loss', 'content': 0.08317908644676208, 'timestamp': '2025-09-10 02:48:28.210403', 'step': 19046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:28.240357', 'step': 19046, 'epoch': 3} {'type': 'loss', 'content': 0.04970146715641022, 'timestamp': '2025-09-10 02:48:28.242885', 'step': 19047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:28.272942', 'step': 19047, 'epoch': 3} {'type': 'loss', 'content': 0.03181871026754379, 'timestamp': '2025-09-10 02:48:28.296845', 'step': 19048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:28.327131', 'step': 19048, 'epoch': 3} {'type': 'loss', 'content': 0.014592989347875118, 'timestamp': '2025-09-10 02:48:28.329200', 'step': 19049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:28.358578', 'step': 19049, 'epoch': 3} {'type': 'loss', 'content': 0.06348411738872528, 'timestamp': '2025-09-10 02:48:28.360929', 'step': 19050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.390973', 'step': 19050, 'epoch': 3} {'type': 'loss', 'content': 0.024342499673366547, 'timestamp': '2025-09-10 02:48:28.393802', 'step': 19051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:28.425235', 'step': 19051, 'epoch': 3} {'type': 'loss', 'content': 0.04107552021741867, 'timestamp': '2025-09-10 02:48:28.448790', 'step': 19052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:28.478999', 'step': 19052, 'epoch': 3} {'type': 'loss', 'content': 0.08869902044534683, 'timestamp': '2025-09-10 02:48:28.481241', 'step': 19053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:28.511467', 'step': 19053, 'epoch': 3} {'type': 'loss', 'content': 0.1040671244263649, 'timestamp': '2025-09-10 02:48:28.513897', 'step': 19054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.543505', 'step': 19054, 'epoch': 3} {'type': 'loss', 'content': 0.03302314877510071, 'timestamp': '2025-09-10 02:48:28.545747', 'step': 19055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.575595', 'step': 19055, 'epoch': 3} {'type': 'loss', 'content': 0.058352623134851456, 'timestamp': '2025-09-10 02:48:28.599059', 'step': 19056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.633118', 'step': 19056, 'epoch': 3} {'type': 'loss', 'content': 0.07908383011817932, 'timestamp': '2025-09-10 02:48:28.635489', 'step': 19057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:28.666684', 'step': 19057, 'epoch': 3} {'type': 'loss', 'content': 0.09090310335159302, 'timestamp': '2025-09-10 02:48:28.669813', 'step': 19058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:28.701689', 'step': 19058, 'epoch': 3} {'type': 'loss', 'content': 0.09201010316610336, 'timestamp': '2025-09-10 02:48:28.705889', 'step': 19059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.741506', 'step': 19059, 'epoch': 3} {'type': 'loss', 'content': 0.049480240792036057, 'timestamp': '2025-09-10 02:48:28.765298', 'step': 19060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:28.795927', 'step': 19060, 'epoch': 3} {'type': 'loss', 'content': 0.011016963049769402, 'timestamp': '2025-09-10 02:48:28.798326', 'step': 19061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.828522', 'step': 19061, 'epoch': 3} {'type': 'loss', 'content': 0.015836454927921295, 'timestamp': '2025-09-10 02:48:28.831624', 'step': 19062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:28.862545', 'step': 19062, 'epoch': 3} {'type': 'loss', 'content': 0.05959492549300194, 'timestamp': '2025-09-10 02:48:28.864954', 'step': 19063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.895179', 'step': 19063, 'epoch': 3} {'type': 'loss', 'content': 0.023212196305394173, 'timestamp': '2025-09-10 02:48:28.918785', 'step': 19064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:28.948769', 'step': 19064, 'epoch': 3} {'type': 'loss', 'content': 0.025816593319177628, 'timestamp': '2025-09-10 02:48:28.951478', 'step': 19065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:28.982306', 'step': 19065, 'epoch': 3} {'type': 'loss', 'content': 0.07167306542396545, 'timestamp': '2025-09-10 02:48:28.985541', 'step': 19066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:29.016285', 'step': 19066, 'epoch': 3} {'type': 'loss', 'content': 0.06847294420003891, 'timestamp': '2025-09-10 02:48:29.019269', 'step': 19067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.049145', 'step': 19067, 'epoch': 3} {'type': 'loss', 'content': 0.03687131777405739, 'timestamp': '2025-09-10 02:48:29.073899', 'step': 19068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:29.104874', 'step': 19068, 'epoch': 3} {'type': 'loss', 'content': 0.04716460034251213, 'timestamp': '2025-09-10 02:48:29.107189', 'step': 19069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:29.137192', 'step': 19069, 'epoch': 3} {'type': 'loss', 'content': 0.04604177176952362, 'timestamp': '2025-09-10 02:48:29.139626', 'step': 19070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:29.169576', 'step': 19070, 'epoch': 3} {'type': 'loss', 'content': 0.05768972635269165, 'timestamp': '2025-09-10 02:48:29.172998', 'step': 19071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:29.203388', 'step': 19071, 'epoch': 3} {'type': 'loss', 'content': 0.03421531617641449, 'timestamp': '2025-09-10 02:48:29.226915', 'step': 19072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:29.257542', 'step': 19072, 'epoch': 3} {'type': 'loss', 'content': 0.09849473834037781, 'timestamp': '2025-09-10 02:48:29.260102', 'step': 19073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:29.291170', 'step': 19073, 'epoch': 3} {'type': 'loss', 'content': 0.04342586174607277, 'timestamp': '2025-09-10 02:48:29.293490', 'step': 19074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.323490', 'step': 19074, 'epoch': 3} {'type': 'loss', 'content': 0.029132336378097534, 'timestamp': '2025-09-10 02:48:29.325903', 'step': 19075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.356349', 'step': 19075, 'epoch': 3} {'type': 'loss', 'content': 0.02872881479561329, 'timestamp': '2025-09-10 02:48:29.379922', 'step': 19076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:29.410925', 'step': 19076, 'epoch': 3} {'type': 'loss', 'content': 0.1201339066028595, 'timestamp': '2025-09-10 02:48:29.413421', 'step': 19077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:29.448073', 'step': 19077, 'epoch': 3} {'type': 'loss', 'content': 0.06879444420337677, 'timestamp': '2025-09-10 02:48:29.450800', 'step': 19078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.481937', 'step': 19078, 'epoch': 3} {'type': 'loss', 'content': 0.0688166692852974, 'timestamp': '2025-09-10 02:48:29.484908', 'step': 19079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.515765', 'step': 19079, 'epoch': 3} {'type': 'loss', 'content': 0.08884435147047043, 'timestamp': '2025-09-10 02:48:29.539447', 'step': 19080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.569179', 'step': 19080, 'epoch': 3} {'type': 'loss', 'content': 0.034344010055065155, 'timestamp': '2025-09-10 02:48:29.571628', 'step': 19081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:29.602141', 'step': 19081, 'epoch': 3} {'type': 'loss', 'content': 0.11929844319820404, 'timestamp': '2025-09-10 02:48:29.604527', 'step': 19082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.635284', 'step': 19082, 'epoch': 3} {'type': 'loss', 'content': 0.0621907077729702, 'timestamp': '2025-09-10 02:48:29.637916', 'step': 19083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:29.667953', 'step': 19083, 'epoch': 3} {'type': 'loss', 'content': 0.04365050047636032, 'timestamp': '2025-09-10 02:48:29.691489', 'step': 19084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.726564', 'step': 19084, 'epoch': 3} {'type': 'loss', 'content': 0.07979188859462738, 'timestamp': '2025-09-10 02:48:29.729023', 'step': 19085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.759455', 'step': 19085, 'epoch': 3} {'type': 'loss', 'content': 0.04054112732410431, 'timestamp': '2025-09-10 02:48:29.761602', 'step': 19086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.792237', 'step': 19086, 'epoch': 3} {'type': 'loss', 'content': 0.07452773302793503, 'timestamp': '2025-09-10 02:48:29.794614', 'step': 19087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:29.824658', 'step': 19087, 'epoch': 3} {'type': 'loss', 'content': 0.04271985590457916, 'timestamp': '2025-09-10 02:48:29.849894', 'step': 19088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:29.880491', 'step': 19088, 'epoch': 3} {'type': 'loss', 'content': 0.04125801473855972, 'timestamp': '2025-09-10 02:48:29.882894', 'step': 19089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:29.913647', 'step': 19089, 'epoch': 3} {'type': 'loss', 'content': 0.05621355026960373, 'timestamp': '2025-09-10 02:48:29.916646', 'step': 19090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:29.947742', 'step': 19090, 'epoch': 3} {'type': 'loss', 'content': 0.008986725471913815, 'timestamp': '2025-09-10 02:48:29.949983', 'step': 19091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:29.979939', 'step': 19091, 'epoch': 3} {'type': 'loss', 'content': 0.06470377743244171, 'timestamp': '2025-09-10 02:48:30.003413', 'step': 19092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:30.033717', 'step': 19092, 'epoch': 3} {'type': 'loss', 'content': 0.0812499076128006, 'timestamp': '2025-09-10 02:48:30.036472', 'step': 19093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:30.066352', 'step': 19093, 'epoch': 3} {'type': 'loss', 'content': 0.011136537417769432, 'timestamp': '2025-09-10 02:48:30.068674', 'step': 19094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:30.098643', 'step': 19094, 'epoch': 3} {'type': 'loss', 'content': 0.058394815772771835, 'timestamp': '2025-09-10 02:48:30.100968', 'step': 19095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:30.131187', 'step': 19095, 'epoch': 3} {'type': 'loss', 'content': 0.06325100362300873, 'timestamp': '2025-09-10 02:48:30.155523', 'step': 19096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:30.186024', 'step': 19096, 'epoch': 3} {'type': 'loss', 'content': 0.03864593058824539, 'timestamp': '2025-09-10 02:48:30.188189', 'step': 19097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:30.218198', 'step': 19097, 'epoch': 3} {'type': 'loss', 'content': 0.10115756839513779, 'timestamp': '2025-09-10 02:48:30.220471', 'step': 19098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:30.250173', 'step': 19098, 'epoch': 3} {'type': 'loss', 'content': 0.06796716898679733, 'timestamp': '2025-09-10 02:48:30.252696', 'step': 19099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:30.282623', 'step': 19099, 'epoch': 3} {'type': 'loss', 'content': 0.07003993541002274, 'timestamp': '2025-09-10 02:48:30.305924', 'step': 19100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:30.349323', 'step': 19100, 'epoch': 3} {'type': 'loss', 'content': 0.048044025897979736, 'timestamp': '2025-09-10 02:48:30.352196', 'step': 19101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:30.383541', 'step': 19101, 'epoch': 3} {'type': 'loss', 'content': 0.053297389298677444, 'timestamp': '2025-09-10 02:48:30.385869', 'step': 19102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:30.415900', 'step': 19102, 'epoch': 3} {'type': 'loss', 'content': 0.06847625970840454, 'timestamp': '2025-09-10 02:48:30.418259', 'step': 19103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:30.449543', 'step': 19103, 'epoch': 3} {'type': 'loss', 'content': 0.03360799700021744, 'timestamp': '2025-09-10 02:48:30.473695', 'step': 19104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:30.505056', 'step': 19104, 'epoch': 3} {'type': 'loss', 'content': 0.10594863444566727, 'timestamp': '2025-09-10 02:48:30.507902', 'step': 19105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:30.538059', 'step': 19105, 'epoch': 3} {'type': 'loss', 'content': 0.033131688833236694, 'timestamp': '2025-09-10 02:48:30.540387', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:48:38.458690', 'step': 19106, 'epoch': 3} {'type': 'pplx', 'content': 12701.378624124734, 'timestamp': '2025-09-10 02:48:38.461725', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:38.491419', 'step': 19106, 'epoch': 3} {'type': 'loss', 'content': 0.04198008030653, 'timestamp': '2025-09-10 02:48:38.493901', 'step': 19107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:38.524863', 'step': 19107, 'epoch': 3} {'type': 'loss', 'content': 0.06717891246080399, 'timestamp': '2025-09-10 02:48:38.548764', 'step': 19108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:38.578539', 'step': 19108, 'epoch': 3} {'type': 'loss', 'content': 0.008526971563696861, 'timestamp': '2025-09-10 02:48:38.581097', 'step': 19109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:38.612091', 'step': 19109, 'epoch': 3} {'type': 'loss', 'content': 0.1237865537405014, 'timestamp': '2025-09-10 02:48:38.614444', 'step': 19110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:38.645471', 'step': 19110, 'epoch': 3} {'type': 'loss', 'content': 0.06452104449272156, 'timestamp': '2025-09-10 02:48:38.647693', 'step': 19111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:38.678409', 'step': 19111, 'epoch': 3} {'type': 'loss', 'content': 0.0880214273929596, 'timestamp': '2025-09-10 02:48:38.701985', 'step': 19112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:38.733137', 'step': 19112, 'epoch': 3} {'type': 'loss', 'content': 0.07345866411924362, 'timestamp': '2025-09-10 02:48:38.735722', 'step': 19113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:38.768216', 'step': 19113, 'epoch': 3} {'type': 'loss', 'content': 0.13787886500358582, 'timestamp': '2025-09-10 02:48:38.770964', 'step': 19114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:38.801289', 'step': 19114, 'epoch': 3} {'type': 'loss', 'content': 0.04425819218158722, 'timestamp': '2025-09-10 02:48:38.803648', 'step': 19115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:38.834527', 'step': 19115, 'epoch': 3} {'type': 'loss', 'content': 0.05629982799291611, 'timestamp': '2025-09-10 02:48:38.858498', 'step': 19116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:38.888890', 'step': 19116, 'epoch': 3} {'type': 'loss', 'content': 0.029755957424640656, 'timestamp': '2025-09-10 02:48:38.891183', 'step': 19117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:38.921220', 'step': 19117, 'epoch': 3} {'type': 'loss', 'content': 0.061509545892477036, 'timestamp': '2025-09-10 02:48:38.923448', 'step': 19118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:38.953503', 'step': 19118, 'epoch': 3} {'type': 'loss', 'content': 0.04406418651342392, 'timestamp': '2025-09-10 02:48:38.956113', 'step': 19119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:38.989169', 'step': 19119, 'epoch': 3} {'type': 'loss', 'content': 0.03253832831978798, 'timestamp': '2025-09-10 02:48:39.012887', 'step': 19120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.045425', 'step': 19120, 'epoch': 3} {'type': 'loss', 'content': 0.07919682562351227, 'timestamp': '2025-09-10 02:48:39.048313', 'step': 19121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.080573', 'step': 19121, 'epoch': 3} {'type': 'loss', 'content': 0.06703097373247147, 'timestamp': '2025-09-10 02:48:39.083063', 'step': 19122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.113796', 'step': 19122, 'epoch': 3} {'type': 'loss', 'content': 0.03810673579573631, 'timestamp': '2025-09-10 02:48:39.116098', 'step': 19123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:39.146523', 'step': 19123, 'epoch': 3} {'type': 'loss', 'content': 0.09962056577205658, 'timestamp': '2025-09-10 02:48:39.170499', 'step': 19124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.200880', 'step': 19124, 'epoch': 3} {'type': 'loss', 'content': 0.04403471574187279, 'timestamp': '2025-09-10 02:48:39.203271', 'step': 19125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.233196', 'step': 19125, 'epoch': 3} {'type': 'loss', 'content': 0.07161562144756317, 'timestamp': '2025-09-10 02:48:39.235473', 'step': 19126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.267073', 'step': 19126, 'epoch': 3} {'type': 'loss', 'content': 0.03177061304450035, 'timestamp': '2025-09-10 02:48:39.269462', 'step': 19127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.301412', 'step': 19127, 'epoch': 3} {'type': 'loss', 'content': 0.040490131825208664, 'timestamp': '2025-09-10 02:48:39.325815', 'step': 19128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.355687', 'step': 19128, 'epoch': 3} {'type': 'loss', 'content': 0.05087510123848915, 'timestamp': '2025-09-10 02:48:39.358128', 'step': 19129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.388219', 'step': 19129, 'epoch': 3} {'type': 'loss', 'content': 0.08421554416418076, 'timestamp': '2025-09-10 02:48:39.390387', 'step': 19130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.420236', 'step': 19130, 'epoch': 3} {'type': 'loss', 'content': 0.06623401492834091, 'timestamp': '2025-09-10 02:48:39.422716', 'step': 19131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.452908', 'step': 19131, 'epoch': 3} {'type': 'loss', 'content': 0.14084531366825104, 'timestamp': '2025-09-10 02:48:39.476485', 'step': 19132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.507257', 'step': 19132, 'epoch': 3} {'type': 'loss', 'content': 0.02091645635664463, 'timestamp': '2025-09-10 02:48:39.509520', 'step': 19133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.539249', 'step': 19133, 'epoch': 3} {'type': 'loss', 'content': 0.028243469074368477, 'timestamp': '2025-09-10 02:48:39.541561', 'step': 19134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.572031', 'step': 19134, 'epoch': 3} {'type': 'loss', 'content': 0.08990766853094101, 'timestamp': '2025-09-10 02:48:39.574640', 'step': 19135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.606004', 'step': 19135, 'epoch': 3} {'type': 'loss', 'content': 0.03530310094356537, 'timestamp': '2025-09-10 02:48:39.629734', 'step': 19136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.660891', 'step': 19136, 'epoch': 3} {'type': 'loss', 'content': 0.0942995622754097, 'timestamp': '2025-09-10 02:48:39.663315', 'step': 19137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.693151', 'step': 19137, 'epoch': 3} {'type': 'loss', 'content': 0.0185394324362278, 'timestamp': '2025-09-10 02:48:39.695783', 'step': 19138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.727385', 'step': 19138, 'epoch': 3} {'type': 'loss', 'content': 0.09331807494163513, 'timestamp': '2025-09-10 02:48:39.729581', 'step': 19139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:39.759397', 'step': 19139, 'epoch': 3} {'type': 'loss', 'content': 0.05074889212846756, 'timestamp': '2025-09-10 02:48:39.783555', 'step': 19140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.814470', 'step': 19140, 'epoch': 3} {'type': 'loss', 'content': 0.056110940873622894, 'timestamp': '2025-09-10 02:48:39.817036', 'step': 19141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:39.846838', 'step': 19141, 'epoch': 3} {'type': 'loss', 'content': 0.07813546806573868, 'timestamp': '2025-09-10 02:48:39.849262', 'step': 19142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.882529', 'step': 19142, 'epoch': 3} {'type': 'loss', 'content': 0.06448081880807877, 'timestamp': '2025-09-10 02:48:39.885689', 'step': 19143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.917631', 'step': 19143, 'epoch': 3} {'type': 'loss', 'content': 0.07284185290336609, 'timestamp': '2025-09-10 02:48:39.942215', 'step': 19144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:39.974610', 'step': 19144, 'epoch': 3} {'type': 'loss', 'content': 0.05508007854223251, 'timestamp': '2025-09-10 02:48:39.978389', 'step': 19145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:40.012590', 'step': 19145, 'epoch': 3} {'type': 'loss', 'content': 0.03553667664527893, 'timestamp': '2025-09-10 02:48:40.016076', 'step': 19146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.048469', 'step': 19146, 'epoch': 3} {'type': 'loss', 'content': 0.09487085789442062, 'timestamp': '2025-09-10 02:48:40.050584', 'step': 19147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:40.083728', 'step': 19147, 'epoch': 3} {'type': 'loss', 'content': 0.0865933746099472, 'timestamp': '2025-09-10 02:48:40.107225', 'step': 19148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:40.138522', 'step': 19148, 'epoch': 3} {'type': 'loss', 'content': 0.04577663913369179, 'timestamp': '2025-09-10 02:48:40.140645', 'step': 19149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:40.170753', 'step': 19149, 'epoch': 3} {'type': 'loss', 'content': 0.05484703555703163, 'timestamp': '2025-09-10 02:48:40.173327', 'step': 19150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:40.202939', 'step': 19150, 'epoch': 3} {'type': 'loss', 'content': 0.017892558127641678, 'timestamp': '2025-09-10 02:48:40.205564', 'step': 19151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:40.236326', 'step': 19151, 'epoch': 3} {'type': 'loss', 'content': 0.058120742440223694, 'timestamp': '2025-09-10 02:48:40.259898', 'step': 19152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.292209', 'step': 19152, 'epoch': 3} {'type': 'loss', 'content': 0.07234311103820801, 'timestamp': '2025-09-10 02:48:40.294689', 'step': 19153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:40.325197', 'step': 19153, 'epoch': 3} {'type': 'loss', 'content': 0.07019048929214478, 'timestamp': '2025-09-10 02:48:40.327718', 'step': 19154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:40.358464', 'step': 19154, 'epoch': 3} {'type': 'loss', 'content': 0.13181249797344208, 'timestamp': '2025-09-10 02:48:40.362094', 'step': 19155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.392667', 'step': 19155, 'epoch': 3} {'type': 'loss', 'content': 0.13848818838596344, 'timestamp': '2025-09-10 02:48:40.416297', 'step': 19156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.446841', 'step': 19156, 'epoch': 3} {'type': 'loss', 'content': 0.08307890594005585, 'timestamp': '2025-09-10 02:48:40.449301', 'step': 19157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.479870', 'step': 19157, 'epoch': 3} {'type': 'loss', 'content': 0.05988037958741188, 'timestamp': '2025-09-10 02:48:40.482149', 'step': 19158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.511356', 'step': 19158, 'epoch': 3} {'type': 'loss', 'content': 0.03860514238476753, 'timestamp': '2025-09-10 02:48:40.513681', 'step': 19159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.543863', 'step': 19159, 'epoch': 3} {'type': 'loss', 'content': 0.013015316799283028, 'timestamp': '2025-09-10 02:48:40.567184', 'step': 19160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:40.598043', 'step': 19160, 'epoch': 3} {'type': 'loss', 'content': 0.023247843608260155, 'timestamp': '2025-09-10 02:48:40.599879', 'step': 19161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:40.630216', 'step': 19161, 'epoch': 3} {'type': 'loss', 'content': 0.019267167896032333, 'timestamp': '2025-09-10 02:48:40.632342', 'step': 19162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:40.662754', 'step': 19162, 'epoch': 3} {'type': 'loss', 'content': 0.07473720610141754, 'timestamp': '2025-09-10 02:48:40.666787', 'step': 19163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.697197', 'step': 19163, 'epoch': 3} {'type': 'loss', 'content': 0.09501234441995621, 'timestamp': '2025-09-10 02:48:40.720812', 'step': 19164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.751567', 'step': 19164, 'epoch': 3} {'type': 'loss', 'content': 0.041774410754442215, 'timestamp': '2025-09-10 02:48:40.755439', 'step': 19165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.788011', 'step': 19165, 'epoch': 3} {'type': 'loss', 'content': 0.06462651491165161, 'timestamp': '2025-09-10 02:48:40.790889', 'step': 19166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.821039', 'step': 19166, 'epoch': 3} {'type': 'loss', 'content': 0.10669902712106705, 'timestamp': '2025-09-10 02:48:40.823614', 'step': 19167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:40.854246', 'step': 19167, 'epoch': 3} {'type': 'loss', 'content': 0.08620646595954895, 'timestamp': '2025-09-10 02:48:40.877778', 'step': 19168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.908020', 'step': 19168, 'epoch': 3} {'type': 'loss', 'content': 0.10868848860263824, 'timestamp': '2025-09-10 02:48:40.910282', 'step': 19169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:40.940316', 'step': 19169, 'epoch': 3} {'type': 'loss', 'content': 0.11002533882856369, 'timestamp': '2025-09-10 02:48:40.942737', 'step': 19170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:40.973623', 'step': 19170, 'epoch': 3} {'type': 'loss', 'content': 0.07124481350183487, 'timestamp': '2025-09-10 02:48:40.976084', 'step': 19171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:41.006452', 'step': 19171, 'epoch': 3} {'type': 'loss', 'content': 0.05622541904449463, 'timestamp': '2025-09-10 02:48:41.029991', 'step': 19172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:41.061577', 'step': 19172, 'epoch': 3} {'type': 'loss', 'content': 0.05307547003030777, 'timestamp': '2025-09-10 02:48:41.064400', 'step': 19173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:41.095926', 'step': 19173, 'epoch': 3} {'type': 'loss', 'content': 0.08009903877973557, 'timestamp': '2025-09-10 02:48:41.098478', 'step': 19174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:41.130003', 'step': 19174, 'epoch': 3} {'type': 'loss', 'content': 0.10264410823583603, 'timestamp': '2025-09-10 02:48:41.132405', 'step': 19175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:48:41.162892', 'step': 19175, 'epoch': 3} {'type': 'loss', 'content': 0.02842663787305355, 'timestamp': '2025-09-10 02:48:41.187698', 'step': 19176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:41.218339', 'step': 19176, 'epoch': 3} {'type': 'loss', 'content': 0.05978763848543167, 'timestamp': '2025-09-10 02:48:41.220912', 'step': 19177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:41.251769', 'step': 19177, 'epoch': 3} {'type': 'loss', 'content': 0.17376530170440674, 'timestamp': '2025-09-10 02:48:41.254257', 'step': 19178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:41.284718', 'step': 19178, 'epoch': 3} {'type': 'loss', 'content': 0.05734998360276222, 'timestamp': '2025-09-10 02:48:41.287072', 'step': 19179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:41.317023', 'step': 19179, 'epoch': 3} {'type': 'loss', 'content': 0.09087777882814407, 'timestamp': '2025-09-10 02:48:41.340757', 'step': 19180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:41.373084', 'step': 19180, 'epoch': 3} {'type': 'loss', 'content': 0.0898226946592331, 'timestamp': '2025-09-10 02:48:41.376465', 'step': 19181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:41.406197', 'step': 19181, 'epoch': 3} {'type': 'loss', 'content': 0.04321182519197464, 'timestamp': '2025-09-10 02:48:41.408267', 'step': 19182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:41.438613', 'step': 19182, 'epoch': 3} {'type': 'loss', 'content': 0.0639280453324318, 'timestamp': '2025-09-10 02:48:41.442257', 'step': 19183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:41.473252', 'step': 19183, 'epoch': 3} {'type': 'loss', 'content': 0.020867345854640007, 'timestamp': '2025-09-10 02:48:41.498798', 'step': 19184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:41.530652', 'step': 19184, 'epoch': 3} {'type': 'loss', 'content': 0.025756938382983208, 'timestamp': '2025-09-10 02:48:41.533180', 'step': 19185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:41.564985', 'step': 19185, 'epoch': 3} {'type': 'loss', 'content': 0.10546664148569107, 'timestamp': '2025-09-10 02:48:41.567513', 'step': 19186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:41.598365', 'step': 19186, 'epoch': 3} {'type': 'loss', 'content': 0.05241827666759491, 'timestamp': '2025-09-10 02:48:41.601086', 'step': 19187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:41.631885', 'step': 19187, 'epoch': 3} {'type': 'loss', 'content': 0.08341584354639053, 'timestamp': '2025-09-10 02:48:41.655046', 'step': 19188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:41.686742', 'step': 19188, 'epoch': 3} {'type': 'loss', 'content': 0.12854845821857452, 'timestamp': '2025-09-10 02:48:41.689466', 'step': 19189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:41.720106', 'step': 19189, 'epoch': 3} {'type': 'loss', 'content': 0.03434206545352936, 'timestamp': '2025-09-10 02:48:41.722493', 'step': 19190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:41.752697', 'step': 19190, 'epoch': 3} {'type': 'loss', 'content': 0.04611142352223396, 'timestamp': '2025-09-10 02:48:41.755383', 'step': 19191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:41.789844', 'step': 19191, 'epoch': 3} {'type': 'loss', 'content': 0.08602587878704071, 'timestamp': '2025-09-10 02:48:41.813890', 'step': 19192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:41.844010', 'step': 19192, 'epoch': 3} {'type': 'loss', 'content': 0.07918565720319748, 'timestamp': '2025-09-10 02:48:41.846213', 'step': 19193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:41.875990', 'step': 19193, 'epoch': 3} {'type': 'loss', 'content': 0.11319970339536667, 'timestamp': '2025-09-10 02:48:41.878710', 'step': 19194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:41.911546', 'step': 19194, 'epoch': 3} {'type': 'loss', 'content': 0.07597419619560242, 'timestamp': '2025-09-10 02:48:41.914077', 'step': 19195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:41.943792', 'step': 19195, 'epoch': 3} {'type': 'loss', 'content': 0.02938532643020153, 'timestamp': '2025-09-10 02:48:41.967556', 'step': 19196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:41.998130', 'step': 19196, 'epoch': 3} {'type': 'loss', 'content': 0.0759168416261673, 'timestamp': '2025-09-10 02:48:42.001235', 'step': 19197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:42.031582', 'step': 19197, 'epoch': 3} {'type': 'loss', 'content': 0.012301920913159847, 'timestamp': '2025-09-10 02:48:42.034242', 'step': 19198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:42.065125', 'step': 19198, 'epoch': 3} {'type': 'loss', 'content': 0.04292070120573044, 'timestamp': '2025-09-10 02:48:42.067559', 'step': 19199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:42.099326', 'step': 19199, 'epoch': 3} {'type': 'loss', 'content': 0.14776299893856049, 'timestamp': '2025-09-10 02:48:42.122912', 'step': 19200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:42.153160', 'step': 19200, 'epoch': 3} {'type': 'loss', 'content': 0.09686114639043808, 'timestamp': '2025-09-10 02:48:42.155485', 'step': 19201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:42.185817', 'step': 19201, 'epoch': 3} {'type': 'loss', 'content': 0.015442077070474625, 'timestamp': '2025-09-10 02:48:42.188075', 'step': 19202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:42.219134', 'step': 19202, 'epoch': 3} {'type': 'loss', 'content': 0.06998641043901443, 'timestamp': '2025-09-10 02:48:42.221571', 'step': 19203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:42.252196', 'step': 19203, 'epoch': 3} {'type': 'loss', 'content': 0.04847234860062599, 'timestamp': '2025-09-10 02:48:42.275739', 'step': 19204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:42.307566', 'step': 19204, 'epoch': 3} {'type': 'loss', 'content': 0.054882749915122986, 'timestamp': '2025-09-10 02:48:42.310554', 'step': 19205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:42.341226', 'step': 19205, 'epoch': 3} {'type': 'loss', 'content': 0.04717802256345749, 'timestamp': '2025-09-10 02:48:42.343726', 'step': 19206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:42.374090', 'step': 19206, 'epoch': 3} {'type': 'loss', 'content': 0.11735262721776962, 'timestamp': '2025-09-10 02:48:42.376489', 'step': 19207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:42.407085', 'step': 19207, 'epoch': 3} {'type': 'loss', 'content': 0.07441867887973785, 'timestamp': '2025-09-10 02:48:42.430686', 'step': 19208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:42.461491', 'step': 19208, 'epoch': 3} {'type': 'loss', 'content': 0.047996360808610916, 'timestamp': '2025-09-10 02:48:42.463997', 'step': 19209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:42.494175', 'step': 19209, 'epoch': 3} {'type': 'loss', 'content': 0.22314982116222382, 'timestamp': '2025-09-10 02:48:42.496378', 'step': 19210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:42.527995', 'step': 19210, 'epoch': 3} {'type': 'loss', 'content': 0.0625433698296547, 'timestamp': '2025-09-10 02:48:42.530913', 'step': 19211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:42.561449', 'step': 19211, 'epoch': 3} {'type': 'loss', 'content': 0.05931266397237778, 'timestamp': '2025-09-10 02:48:42.584838', 'step': 19212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:42.615232', 'step': 19212, 'epoch': 3} {'type': 'loss', 'content': 0.17087262868881226, 'timestamp': '2025-09-10 02:48:42.617491', 'step': 19213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:42.647507', 'step': 19213, 'epoch': 3} {'type': 'loss', 'content': 0.04703966900706291, 'timestamp': '2025-09-10 02:48:42.649988', 'step': 19214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:42.681501', 'step': 19214, 'epoch': 3} {'type': 'loss', 'content': 0.078749880194664, 'timestamp': '2025-09-10 02:48:42.684345', 'step': 19215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:42.716211', 'step': 19215, 'epoch': 3} {'type': 'loss', 'content': 0.12652766704559326, 'timestamp': '2025-09-10 02:48:42.739818', 'step': 19216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:42.774040', 'step': 19216, 'epoch': 3} {'type': 'loss', 'content': 0.09076354652643204, 'timestamp': '2025-09-10 02:48:42.781071', 'step': 19217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:42.821926', 'step': 19217, 'epoch': 3} {'type': 'loss', 'content': 0.048183657228946686, 'timestamp': '2025-09-10 02:48:42.824370', 'step': 19218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:42.854184', 'step': 19218, 'epoch': 3} {'type': 'loss', 'content': 0.11128063499927521, 'timestamp': '2025-09-10 02:48:42.856470', 'step': 19219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:42.886865', 'step': 19219, 'epoch': 3} {'type': 'loss', 'content': 0.14177316427230835, 'timestamp': '2025-09-10 02:48:42.911290', 'step': 19220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:48:42.941738', 'step': 19220, 'epoch': 3} {'type': 'loss', 'content': 0.07270704209804535, 'timestamp': '2025-09-10 02:48:42.943914', 'step': 19221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:42.973918', 'step': 19221, 'epoch': 3} {'type': 'loss', 'content': 0.039845507591962814, 'timestamp': '2025-09-10 02:48:42.976616', 'step': 19222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.006886', 'step': 19222, 'epoch': 3} {'type': 'loss', 'content': 0.06328928470611572, 'timestamp': '2025-09-10 02:48:43.009125', 'step': 19223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.040823', 'step': 19223, 'epoch': 3} {'type': 'loss', 'content': 0.06485055387020111, 'timestamp': '2025-09-10 02:48:43.064838', 'step': 19224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.097559', 'step': 19224, 'epoch': 3} {'type': 'loss', 'content': 0.03373809903860092, 'timestamp': '2025-09-10 02:48:43.099880', 'step': 19225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:43.129988', 'step': 19225, 'epoch': 3} {'type': 'loss', 'content': 0.028922615572810173, 'timestamp': '2025-09-10 02:48:43.132094', 'step': 19226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:43.162074', 'step': 19226, 'epoch': 3} {'type': 'loss', 'content': 0.058155160397291183, 'timestamp': '2025-09-10 02:48:43.164960', 'step': 19227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:43.194772', 'step': 19227, 'epoch': 3} {'type': 'loss', 'content': 0.13647063076496124, 'timestamp': '2025-09-10 02:48:43.219961', 'step': 19228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.250734', 'step': 19228, 'epoch': 3} {'type': 'loss', 'content': 0.1375242918729782, 'timestamp': '2025-09-10 02:48:43.253095', 'step': 19229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:43.287331', 'step': 19229, 'epoch': 3} {'type': 'loss', 'content': 0.08488039672374725, 'timestamp': '2025-09-10 02:48:43.292771', 'step': 19230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:43.325938', 'step': 19230, 'epoch': 3} {'type': 'loss', 'content': 0.008478579111397266, 'timestamp': '2025-09-10 02:48:43.328363', 'step': 19231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.358707', 'step': 19231, 'epoch': 3} {'type': 'loss', 'content': 0.09705711901187897, 'timestamp': '2025-09-10 02:48:43.382241', 'step': 19232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.429183', 'step': 19232, 'epoch': 3} {'type': 'loss', 'content': 0.04303998127579689, 'timestamp': '2025-09-10 02:48:43.433582', 'step': 19233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.470439', 'step': 19233, 'epoch': 3} {'type': 'loss', 'content': 0.09430381655693054, 'timestamp': '2025-09-10 02:48:43.474435', 'step': 19234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:43.511115', 'step': 19234, 'epoch': 3} {'type': 'loss', 'content': 0.15461556613445282, 'timestamp': '2025-09-10 02:48:43.516249', 'step': 19235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:43.551946', 'step': 19235, 'epoch': 3} {'type': 'loss', 'content': 0.07816258817911148, 'timestamp': '2025-09-10 02:48:43.577472', 'step': 19236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:43.607963', 'step': 19236, 'epoch': 3} {'type': 'loss', 'content': 0.12849411368370056, 'timestamp': '2025-09-10 02:48:43.610637', 'step': 19237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:43.642445', 'step': 19237, 'epoch': 3} {'type': 'loss', 'content': 0.03767715394496918, 'timestamp': '2025-09-10 02:48:43.644858', 'step': 19238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.675668', 'step': 19238, 'epoch': 3} {'type': 'loss', 'content': 0.1008601114153862, 'timestamp': '2025-09-10 02:48:43.678181', 'step': 19239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:43.708841', 'step': 19239, 'epoch': 3} {'type': 'loss', 'content': 0.08003515750169754, 'timestamp': '2025-09-10 02:48:43.733107', 'step': 19240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:43.764119', 'step': 19240, 'epoch': 3} {'type': 'loss', 'content': 0.06308946013450623, 'timestamp': '2025-09-10 02:48:43.767273', 'step': 19241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:43.798029', 'step': 19241, 'epoch': 3} {'type': 'loss', 'content': 0.07406557351350784, 'timestamp': '2025-09-10 02:48:43.800836', 'step': 19242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.833955', 'step': 19242, 'epoch': 3} {'type': 'loss', 'content': 0.12006097286939621, 'timestamp': '2025-09-10 02:48:43.836530', 'step': 19243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:43.867893', 'step': 19243, 'epoch': 3} {'type': 'loss', 'content': 0.05721208453178406, 'timestamp': '2025-09-10 02:48:43.891746', 'step': 19244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:43.922597', 'step': 19244, 'epoch': 3} {'type': 'loss', 'content': 0.05631643906235695, 'timestamp': '2025-09-10 02:48:43.924656', 'step': 19245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:43.955866', 'step': 19245, 'epoch': 3} {'type': 'loss', 'content': 0.11256150156259537, 'timestamp': '2025-09-10 02:48:43.958121', 'step': 19246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:43.988991', 'step': 19246, 'epoch': 3} {'type': 'loss', 'content': 0.05241985246539116, 'timestamp': '2025-09-10 02:48:43.991724', 'step': 19247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.022802', 'step': 19247, 'epoch': 3} {'type': 'loss', 'content': 0.060198962688446045, 'timestamp': '2025-09-10 02:48:44.046265', 'step': 19248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.077441', 'step': 19248, 'epoch': 3} {'type': 'loss', 'content': 0.10994675010442734, 'timestamp': '2025-09-10 02:48:44.079811', 'step': 19249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:44.111873', 'step': 19249, 'epoch': 3} {'type': 'loss', 'content': 0.12252040207386017, 'timestamp': '2025-09-10 02:48:44.114287', 'step': 19250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:44.144900', 'step': 19250, 'epoch': 3} {'type': 'loss', 'content': 0.13915058970451355, 'timestamp': '2025-09-10 02:48:44.147506', 'step': 19251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.177969', 'step': 19251, 'epoch': 3} {'type': 'loss', 'content': 0.08413765579462051, 'timestamp': '2025-09-10 02:48:44.201507', 'step': 19252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.232080', 'step': 19252, 'epoch': 3} {'type': 'loss', 'content': 0.07688248157501221, 'timestamp': '2025-09-10 02:48:44.234381', 'step': 19253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:44.264757', 'step': 19253, 'epoch': 3} {'type': 'loss', 'content': 0.05380469933152199, 'timestamp': '2025-09-10 02:48:44.267600', 'step': 19254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.298666', 'step': 19254, 'epoch': 3} {'type': 'loss', 'content': 0.06012981757521629, 'timestamp': '2025-09-10 02:48:44.302312', 'step': 19255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:44.336764', 'step': 19255, 'epoch': 3} {'type': 'loss', 'content': 0.11607500910758972, 'timestamp': '2025-09-10 02:48:44.360308', 'step': 19256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:44.391672', 'step': 19256, 'epoch': 3} {'type': 'loss', 'content': 0.09132696688175201, 'timestamp': '2025-09-10 02:48:44.394192', 'step': 19257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.425408', 'step': 19257, 'epoch': 3} {'type': 'loss', 'content': 0.046711258590221405, 'timestamp': '2025-09-10 02:48:44.427576', 'step': 19258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.458501', 'step': 19258, 'epoch': 3} {'type': 'loss', 'content': 0.10478048026561737, 'timestamp': '2025-09-10 02:48:44.462660', 'step': 19259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.498159', 'step': 19259, 'epoch': 3} {'type': 'loss', 'content': 0.09580198675394058, 'timestamp': '2025-09-10 02:48:44.524191', 'step': 19260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:44.560451', 'step': 19260, 'epoch': 3} {'type': 'loss', 'content': 0.06567066162824631, 'timestamp': '2025-09-10 02:48:44.564759', 'step': 19261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:44.599557', 'step': 19261, 'epoch': 3} {'type': 'loss', 'content': 0.05791787430644035, 'timestamp': '2025-09-10 02:48:44.603906', 'step': 19262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.636990', 'step': 19262, 'epoch': 3} {'type': 'loss', 'content': 0.05692851543426514, 'timestamp': '2025-09-10 02:48:44.639677', 'step': 19263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:44.670329', 'step': 19263, 'epoch': 3} {'type': 'loss', 'content': 0.07317167520523071, 'timestamp': '2025-09-10 02:48:44.693868', 'step': 19264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.724749', 'step': 19264, 'epoch': 3} {'type': 'loss', 'content': 0.03895227238535881, 'timestamp': '2025-09-10 02:48:44.726807', 'step': 19265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:44.758222', 'step': 19265, 'epoch': 3} {'type': 'loss', 'content': 0.13296926021575928, 'timestamp': '2025-09-10 02:48:44.760466', 'step': 19266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:44.790311', 'step': 19266, 'epoch': 3} {'type': 'loss', 'content': 0.12909232079982758, 'timestamp': '2025-09-10 02:48:44.793422', 'step': 19267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:44.825135', 'step': 19267, 'epoch': 3} {'type': 'loss', 'content': 0.08617392927408218, 'timestamp': '2025-09-10 02:48:44.849217', 'step': 19268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:44.879208', 'step': 19268, 'epoch': 3} {'type': 'loss', 'content': 0.060326654464006424, 'timestamp': '2025-09-10 02:48:44.881731', 'step': 19269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:44.913204', 'step': 19269, 'epoch': 3} {'type': 'loss', 'content': 0.12106550484895706, 'timestamp': '2025-09-10 02:48:44.916206', 'step': 19270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:44.949488', 'step': 19270, 'epoch': 3} {'type': 'loss', 'content': 0.04395296797156334, 'timestamp': '2025-09-10 02:48:44.952128', 'step': 19271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:44.982060', 'step': 19271, 'epoch': 3} {'type': 'loss', 'content': 0.06139020994305611, 'timestamp': '2025-09-10 02:48:45.008324', 'step': 19272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:45.039945', 'step': 19272, 'epoch': 3} {'type': 'loss', 'content': 0.05465216189622879, 'timestamp': '2025-09-10 02:48:45.042570', 'step': 19273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:45.074955', 'step': 19273, 'epoch': 3} {'type': 'loss', 'content': 0.07273908704519272, 'timestamp': '2025-09-10 02:48:45.077637', 'step': 19274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:45.108577', 'step': 19274, 'epoch': 3} {'type': 'loss', 'content': 0.10584577918052673, 'timestamp': '2025-09-10 02:48:45.111853', 'step': 19275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:45.149817', 'step': 19275, 'epoch': 3} {'type': 'loss', 'content': 0.01814878173172474, 'timestamp': '2025-09-10 02:48:45.173613', 'step': 19276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:45.204084', 'step': 19276, 'epoch': 3} {'type': 'loss', 'content': 0.09092716872692108, 'timestamp': '2025-09-10 02:48:45.206364', 'step': 19277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:45.236617', 'step': 19277, 'epoch': 3} {'type': 'loss', 'content': 0.09676668792963028, 'timestamp': '2025-09-10 02:48:45.239199', 'step': 19278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:45.270454', 'step': 19278, 'epoch': 3} {'type': 'loss', 'content': 0.09462124854326248, 'timestamp': '2025-09-10 02:48:45.272787', 'step': 19279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:45.303118', 'step': 19279, 'epoch': 3} {'type': 'loss', 'content': 0.01153801754117012, 'timestamp': '2025-09-10 02:48:45.326849', 'step': 19280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:45.358277', 'step': 19280, 'epoch': 3} {'type': 'loss', 'content': 0.03358398377895355, 'timestamp': '2025-09-10 02:48:45.360823', 'step': 19281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:45.390744', 'step': 19281, 'epoch': 3} {'type': 'loss', 'content': 0.08079423010349274, 'timestamp': '2025-09-10 02:48:45.394787', 'step': 19282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:45.432320', 'step': 19282, 'epoch': 3} {'type': 'loss', 'content': 0.05311921238899231, 'timestamp': '2025-09-10 02:48:45.434594', 'step': 19283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:45.467550', 'step': 19283, 'epoch': 3} {'type': 'loss', 'content': 0.08624530583620071, 'timestamp': '2025-09-10 02:48:45.491090', 'step': 19284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:45.521131', 'step': 19284, 'epoch': 3} {'type': 'loss', 'content': 0.13468404114246368, 'timestamp': '2025-09-10 02:48:45.523427', 'step': 19285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:45.554489', 'step': 19285, 'epoch': 3} {'type': 'loss', 'content': 0.10756807774305344, 'timestamp': '2025-09-10 02:48:45.557137', 'step': 19286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:45.587293', 'step': 19286, 'epoch': 3} {'type': 'loss', 'content': 0.04994962736964226, 'timestamp': '2025-09-10 02:48:45.589616', 'step': 19287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:45.620787', 'step': 19287, 'epoch': 3} {'type': 'loss', 'content': 0.12685319781303406, 'timestamp': '2025-09-10 02:48:45.644149', 'step': 19288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:45.674954', 'step': 19288, 'epoch': 3} {'type': 'loss', 'content': 0.05144122242927551, 'timestamp': '2025-09-10 02:48:45.677256', 'step': 19289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:45.707655', 'step': 19289, 'epoch': 3} {'type': 'loss', 'content': 0.15267881751060486, 'timestamp': '2025-09-10 02:48:45.710455', 'step': 19290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:45.743205', 'step': 19290, 'epoch': 3} {'type': 'loss', 'content': 0.09687663614749908, 'timestamp': '2025-09-10 02:48:45.745769', 'step': 19291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:45.775623', 'step': 19291, 'epoch': 3} {'type': 'loss', 'content': 0.1167965978384018, 'timestamp': '2025-09-10 02:48:45.799417', 'step': 19292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:45.829770', 'step': 19292, 'epoch': 3} {'type': 'loss', 'content': 0.07047083973884583, 'timestamp': '2025-09-10 02:48:45.833746', 'step': 19293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:45.863763', 'step': 19293, 'epoch': 3} {'type': 'loss', 'content': 0.1124575212597847, 'timestamp': '2025-09-10 02:48:45.866492', 'step': 19294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:45.896681', 'step': 19294, 'epoch': 3} {'type': 'loss', 'content': 0.08237746357917786, 'timestamp': '2025-09-10 02:48:45.899063', 'step': 19295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:45.929220', 'step': 19295, 'epoch': 3} {'type': 'loss', 'content': 0.026162615045905113, 'timestamp': '2025-09-10 02:48:45.952856', 'step': 19296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:45.983786', 'step': 19296, 'epoch': 3} {'type': 'loss', 'content': 0.03844953700900078, 'timestamp': '2025-09-10 02:48:45.985716', 'step': 19297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.015659', 'step': 19297, 'epoch': 3} {'type': 'loss', 'content': 0.050564222037792206, 'timestamp': '2025-09-10 02:48:46.018448', 'step': 19298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:46.049120', 'step': 19298, 'epoch': 3} {'type': 'loss', 'content': 0.07624387741088867, 'timestamp': '2025-09-10 02:48:46.051834', 'step': 19299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:46.081752', 'step': 19299, 'epoch': 3} {'type': 'loss', 'content': 0.07894077897071838, 'timestamp': '2025-09-10 02:48:46.105422', 'step': 19300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:46.136081', 'step': 19300, 'epoch': 3} {'type': 'loss', 'content': 0.09209629148244858, 'timestamp': '2025-09-10 02:48:46.139222', 'step': 19301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.170803', 'step': 19301, 'epoch': 3} {'type': 'loss', 'content': 0.11197932809591293, 'timestamp': '2025-09-10 02:48:46.173130', 'step': 19302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.203032', 'step': 19302, 'epoch': 3} {'type': 'loss', 'content': 0.05420530214905739, 'timestamp': '2025-09-10 02:48:46.205353', 'step': 19303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.235424', 'step': 19303, 'epoch': 3} {'type': 'loss', 'content': 0.03949444741010666, 'timestamp': '2025-09-10 02:48:46.260838', 'step': 19304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:46.291133', 'step': 19304, 'epoch': 3} {'type': 'loss', 'content': 0.12495091557502747, 'timestamp': '2025-09-10 02:48:46.293313', 'step': 19305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.325478', 'step': 19305, 'epoch': 3} {'type': 'loss', 'content': 0.08832329511642456, 'timestamp': '2025-09-10 02:48:46.328356', 'step': 19306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:46.358249', 'step': 19306, 'epoch': 3} {'type': 'loss', 'content': 0.06539738178253174, 'timestamp': '2025-09-10 02:48:46.360617', 'step': 19307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:46.391609', 'step': 19307, 'epoch': 3} {'type': 'loss', 'content': 0.02352817915380001, 'timestamp': '2025-09-10 02:48:46.414974', 'step': 19308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.445557', 'step': 19308, 'epoch': 3} {'type': 'loss', 'content': 0.10947355628013611, 'timestamp': '2025-09-10 02:48:46.448058', 'step': 19309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:46.479030', 'step': 19309, 'epoch': 3} {'type': 'loss', 'content': 0.029077179729938507, 'timestamp': '2025-09-10 02:48:46.481640', 'step': 19310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:46.512349', 'step': 19310, 'epoch': 3} {'type': 'loss', 'content': 0.08505763858556747, 'timestamp': '2025-09-10 02:48:46.514650', 'step': 19311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:46.545065', 'step': 19311, 'epoch': 3} {'type': 'loss', 'content': 0.10194335877895355, 'timestamp': '2025-09-10 02:48:46.569874', 'step': 19312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.608064', 'step': 19312, 'epoch': 3} {'type': 'loss', 'content': 0.01930478774011135, 'timestamp': '2025-09-10 02:48:46.610494', 'step': 19313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:46.640574', 'step': 19313, 'epoch': 3} {'type': 'loss', 'content': 0.1178673580288887, 'timestamp': '2025-09-10 02:48:46.647890', 'step': 19314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.679021', 'step': 19314, 'epoch': 3} {'type': 'loss', 'content': 0.09402605146169662, 'timestamp': '2025-09-10 02:48:46.681306', 'step': 19315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.711317', 'step': 19315, 'epoch': 3} {'type': 'loss', 'content': 0.1049952432513237, 'timestamp': '2025-09-10 02:48:46.735304', 'step': 19316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:46.765591', 'step': 19316, 'epoch': 3} {'type': 'loss', 'content': 0.0857122614979744, 'timestamp': '2025-09-10 02:48:46.768324', 'step': 19317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.799301', 'step': 19317, 'epoch': 3} {'type': 'loss', 'content': 0.10397718846797943, 'timestamp': '2025-09-10 02:48:46.802585', 'step': 19318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:46.833180', 'step': 19318, 'epoch': 3} {'type': 'loss', 'content': 0.0725763738155365, 'timestamp': '2025-09-10 02:48:46.835745', 'step': 19319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.865724', 'step': 19319, 'epoch': 3} {'type': 'loss', 'content': 0.07671061158180237, 'timestamp': '2025-09-10 02:48:46.889239', 'step': 19320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:46.919529', 'step': 19320, 'epoch': 3} {'type': 'loss', 'content': 0.08194548636674881, 'timestamp': '2025-09-10 02:48:46.922188', 'step': 19321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:46.952469', 'step': 19321, 'epoch': 3} {'type': 'loss', 'content': 0.16773808002471924, 'timestamp': '2025-09-10 02:48:46.955075', 'step': 19322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:46.985001', 'step': 19322, 'epoch': 3} {'type': 'loss', 'content': 0.07585833966732025, 'timestamp': '2025-09-10 02:48:46.988931', 'step': 19323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.023032', 'step': 19323, 'epoch': 3} {'type': 'loss', 'content': 0.051653943955898285, 'timestamp': '2025-09-10 02:48:47.046902', 'step': 19324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:47.087521', 'step': 19324, 'epoch': 3} {'type': 'loss', 'content': 0.019848644733428955, 'timestamp': '2025-09-10 02:48:47.091262', 'step': 19325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:47.128334', 'step': 19325, 'epoch': 3} {'type': 'loss', 'content': 0.11454550176858902, 'timestamp': '2025-09-10 02:48:47.132735', 'step': 19326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.169549', 'step': 19326, 'epoch': 3} {'type': 'loss', 'content': 0.04165548458695412, 'timestamp': '2025-09-10 02:48:47.176707', 'step': 19327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:47.217924', 'step': 19327, 'epoch': 3} {'type': 'loss', 'content': 0.09045151621103287, 'timestamp': '2025-09-10 02:48:47.244320', 'step': 19328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:47.281758', 'step': 19328, 'epoch': 3} {'type': 'loss', 'content': 0.044540952891111374, 'timestamp': '2025-09-10 02:48:47.284488', 'step': 19329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:47.315362', 'step': 19329, 'epoch': 3} {'type': 'loss', 'content': 0.08890929073095322, 'timestamp': '2025-09-10 02:48:47.318155', 'step': 19330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:47.348932', 'step': 19330, 'epoch': 3} {'type': 'loss', 'content': 0.10222836583852768, 'timestamp': '2025-09-10 02:48:47.351882', 'step': 19331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:47.384395', 'step': 19331, 'epoch': 3} {'type': 'loss', 'content': 0.0858575701713562, 'timestamp': '2025-09-10 02:48:47.408005', 'step': 19332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:47.438325', 'step': 19332, 'epoch': 3} {'type': 'loss', 'content': 0.10982795804738998, 'timestamp': '2025-09-10 02:48:47.441091', 'step': 19333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.471552', 'step': 19333, 'epoch': 3} {'type': 'loss', 'content': 0.08377280086278915, 'timestamp': '2025-09-10 02:48:47.473852', 'step': 19334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:47.504303', 'step': 19334, 'epoch': 3} {'type': 'loss', 'content': 0.020500587299466133, 'timestamp': '2025-09-10 02:48:47.506806', 'step': 19335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.537254', 'step': 19335, 'epoch': 3} {'type': 'loss', 'content': 0.08207502216100693, 'timestamp': '2025-09-10 02:48:47.560875', 'step': 19336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.592004', 'step': 19336, 'epoch': 3} {'type': 'loss', 'content': 0.06669751554727554, 'timestamp': '2025-09-10 02:48:47.594505', 'step': 19337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.624399', 'step': 19337, 'epoch': 3} {'type': 'loss', 'content': 0.08863107115030289, 'timestamp': '2025-09-10 02:48:47.628386', 'step': 19338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:47.660770', 'step': 19338, 'epoch': 3} {'type': 'loss', 'content': 0.0912296399474144, 'timestamp': '2025-09-10 02:48:47.663368', 'step': 19339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.694160', 'step': 19339, 'epoch': 3} {'type': 'loss', 'content': 0.10140367597341537, 'timestamp': '2025-09-10 02:48:47.717874', 'step': 19340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.747923', 'step': 19340, 'epoch': 3} {'type': 'loss', 'content': 0.14089106023311615, 'timestamp': '2025-09-10 02:48:47.750988', 'step': 19341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:47.782514', 'step': 19341, 'epoch': 3} {'type': 'loss', 'content': 0.053240448236465454, 'timestamp': '2025-09-10 02:48:47.785623', 'step': 19342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.816158', 'step': 19342, 'epoch': 3} {'type': 'loss', 'content': 0.10624685138463974, 'timestamp': '2025-09-10 02:48:47.820873', 'step': 19343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:47.856033', 'step': 19343, 'epoch': 3} {'type': 'loss', 'content': 0.08220528066158295, 'timestamp': '2025-09-10 02:48:47.881258', 'step': 19344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:47.912076', 'step': 19344, 'epoch': 3} {'type': 'loss', 'content': 0.05040305480360985, 'timestamp': '2025-09-10 02:48:47.914696', 'step': 19345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:47.946482', 'step': 19345, 'epoch': 3} {'type': 'loss', 'content': 0.08308278024196625, 'timestamp': '2025-09-10 02:48:47.948911', 'step': 19346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:47.979186', 'step': 19346, 'epoch': 3} {'type': 'loss', 'content': 0.04121299460530281, 'timestamp': '2025-09-10 02:48:47.982101', 'step': 19347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:48.011907', 'step': 19347, 'epoch': 3} {'type': 'loss', 'content': 0.03045836091041565, 'timestamp': '2025-09-10 02:48:48.038333', 'step': 19348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:48.069336', 'step': 19348, 'epoch': 3} {'type': 'loss', 'content': 0.09606265276670456, 'timestamp': '2025-09-10 02:48:48.071921', 'step': 19349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:48.101822', 'step': 19349, 'epoch': 3} {'type': 'loss', 'content': 0.05840054899454117, 'timestamp': '2025-09-10 02:48:48.104305', 'step': 19350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:48.135474', 'step': 19350, 'epoch': 3} {'type': 'loss', 'content': 0.06607435643672943, 'timestamp': '2025-09-10 02:48:48.137860', 'step': 19351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:48.167565', 'step': 19351, 'epoch': 3} {'type': 'loss', 'content': 0.09804403781890869, 'timestamp': '2025-09-10 02:48:48.191369', 'step': 19352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:48.221618', 'step': 19352, 'epoch': 3} {'type': 'loss', 'content': 0.07850398123264313, 'timestamp': '2025-09-10 02:48:48.224336', 'step': 19353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:48.254404', 'step': 19353, 'epoch': 3} {'type': 'loss', 'content': 0.053071435540914536, 'timestamp': '2025-09-10 02:48:48.256650', 'step': 19354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:48.286605', 'step': 19354, 'epoch': 3} {'type': 'loss', 'content': 0.14435559511184692, 'timestamp': '2025-09-10 02:48:48.289298', 'step': 19355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:48.319506', 'step': 19355, 'epoch': 3} {'type': 'loss', 'content': 0.044551391154527664, 'timestamp': '2025-09-10 02:48:48.343393', 'step': 19356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:48.374662', 'step': 19356, 'epoch': 3} {'type': 'loss', 'content': 0.1444026231765747, 'timestamp': '2025-09-10 02:48:48.377195', 'step': 19357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:48.407151', 'step': 19357, 'epoch': 3} {'type': 'loss', 'content': 0.05071796476840973, 'timestamp': '2025-09-10 02:48:48.409859', 'step': 19358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:48.440224', 'step': 19358, 'epoch': 3} {'type': 'loss', 'content': 0.026332784444093704, 'timestamp': '2025-09-10 02:48:48.444861', 'step': 19359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:48.480135', 'step': 19359, 'epoch': 3} {'type': 'loss', 'content': 0.034820906817913055, 'timestamp': '2025-09-10 02:48:48.503800', 'step': 19360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:48.534585', 'step': 19360, 'epoch': 3} {'type': 'loss', 'content': 0.09122586995363235, 'timestamp': '2025-09-10 02:48:48.539774', 'step': 19361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:48.570405', 'step': 19361, 'epoch': 3} {'type': 'loss', 'content': 0.10149520635604858, 'timestamp': '2025-09-10 02:48:48.573099', 'step': 19362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:48.603948', 'step': 19362, 'epoch': 3} {'type': 'loss', 'content': 0.10199449211359024, 'timestamp': '2025-09-10 02:48:48.606519', 'step': 19363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:48.653679', 'step': 19363, 'epoch': 3} {'type': 'loss', 'content': 0.10127916187047958, 'timestamp': '2025-09-10 02:48:48.677062', 'step': 19364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:48.707127', 'step': 19364, 'epoch': 3} {'type': 'loss', 'content': 0.06920137256383896, 'timestamp': '2025-09-10 02:48:48.709718', 'step': 19365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:48.739965', 'step': 19365, 'epoch': 3} {'type': 'loss', 'content': 0.05687365680932999, 'timestamp': '2025-09-10 02:48:48.742597', 'step': 19366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:48.777906', 'step': 19366, 'epoch': 3} {'type': 'loss', 'content': 0.04103178158402443, 'timestamp': '2025-09-10 02:48:48.780525', 'step': 19367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:48.812621', 'step': 19367, 'epoch': 3} {'type': 'loss', 'content': 0.08682236820459366, 'timestamp': '2025-09-10 02:48:48.840097', 'step': 19368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:48.872407', 'step': 19368, 'epoch': 3} {'type': 'loss', 'content': 0.06514507532119751, 'timestamp': '2025-09-10 02:48:48.874662', 'step': 19369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:48.904945', 'step': 19369, 'epoch': 3} {'type': 'loss', 'content': 0.1003376692533493, 'timestamp': '2025-09-10 02:48:48.907851', 'step': 19370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:48.939594', 'step': 19370, 'epoch': 3} {'type': 'loss', 'content': 0.08151499181985855, 'timestamp': '2025-09-10 02:48:48.942444', 'step': 19371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:48.973733', 'step': 19371, 'epoch': 3} {'type': 'loss', 'content': 0.03683076798915863, 'timestamp': '2025-09-10 02:48:48.997922', 'step': 19372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.029003', 'step': 19372, 'epoch': 3} {'type': 'loss', 'content': 0.07461484521627426, 'timestamp': '2025-09-10 02:48:49.032128', 'step': 19373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.066787', 'step': 19373, 'epoch': 3} {'type': 'loss', 'content': 0.05907796323299408, 'timestamp': '2025-09-10 02:48:49.069687', 'step': 19374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:49.103252', 'step': 19374, 'epoch': 3} {'type': 'loss', 'content': 0.10156810283660889, 'timestamp': '2025-09-10 02:48:49.106259', 'step': 19375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.138944', 'step': 19375, 'epoch': 3} {'type': 'loss', 'content': 0.05241777375340462, 'timestamp': '2025-09-10 02:48:49.162773', 'step': 19376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.194754', 'step': 19376, 'epoch': 3} {'type': 'loss', 'content': 0.04487645626068115, 'timestamp': '2025-09-10 02:48:49.199193', 'step': 19377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:49.230758', 'step': 19377, 'epoch': 3} {'type': 'loss', 'content': 0.04800876975059509, 'timestamp': '2025-09-10 02:48:49.233482', 'step': 19378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:49.264156', 'step': 19378, 'epoch': 3} {'type': 'loss', 'content': 0.025349382311105728, 'timestamp': '2025-09-10 02:48:49.267341', 'step': 19379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.298190', 'step': 19379, 'epoch': 3} {'type': 'loss', 'content': 0.0785260796546936, 'timestamp': '2025-09-10 02:48:49.321852', 'step': 19380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.353822', 'step': 19380, 'epoch': 3} {'type': 'loss', 'content': 0.12803475558757782, 'timestamp': '2025-09-10 02:48:49.358906', 'step': 19381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.389157', 'step': 19381, 'epoch': 3} {'type': 'loss', 'content': 0.07263067364692688, 'timestamp': '2025-09-10 02:48:49.391359', 'step': 19382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.422329', 'step': 19382, 'epoch': 3} {'type': 'loss', 'content': 0.09159103780984879, 'timestamp': '2025-09-10 02:48:49.424802', 'step': 19383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.455375', 'step': 19383, 'epoch': 3} {'type': 'loss', 'content': 0.0906955674290657, 'timestamp': '2025-09-10 02:48:49.479249', 'step': 19384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.509962', 'step': 19384, 'epoch': 3} {'type': 'loss', 'content': 0.03454575687646866, 'timestamp': '2025-09-10 02:48:49.512401', 'step': 19385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.542474', 'step': 19385, 'epoch': 3} {'type': 'loss', 'content': 0.07592419534921646, 'timestamp': '2025-09-10 02:48:49.545235', 'step': 19386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:49.576550', 'step': 19386, 'epoch': 3} {'type': 'loss', 'content': 0.08632758259773254, 'timestamp': '2025-09-10 02:48:49.579003', 'step': 19387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:48:49.611418', 'step': 19387, 'epoch': 3} {'type': 'loss', 'content': 0.04363848641514778, 'timestamp': '2025-09-10 02:48:49.639157', 'step': 19388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:49.669889', 'step': 19388, 'epoch': 3} {'type': 'loss', 'content': 0.019642043858766556, 'timestamp': '2025-09-10 02:48:49.672067', 'step': 19389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:49.702548', 'step': 19389, 'epoch': 3} {'type': 'loss', 'content': 0.05738285928964615, 'timestamp': '2025-09-10 02:48:49.704887', 'step': 19390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:49.735277', 'step': 19390, 'epoch': 3} {'type': 'loss', 'content': 0.087480828166008, 'timestamp': '2025-09-10 02:48:49.737678', 'step': 19391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:49.768137', 'step': 19391, 'epoch': 3} {'type': 'loss', 'content': 0.04782296344637871, 'timestamp': '2025-09-10 02:48:49.792027', 'step': 19392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.823365', 'step': 19392, 'epoch': 3} {'type': 'loss', 'content': 0.173273965716362, 'timestamp': '2025-09-10 02:48:49.826874', 'step': 19393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:49.858302', 'step': 19393, 'epoch': 3} {'type': 'loss', 'content': 0.057375457137823105, 'timestamp': '2025-09-10 02:48:49.860732', 'step': 19394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:49.891036', 'step': 19394, 'epoch': 3} {'type': 'loss', 'content': 0.04788488149642944, 'timestamp': '2025-09-10 02:48:49.894614', 'step': 19395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:49.925327', 'step': 19395, 'epoch': 3} {'type': 'loss', 'content': 0.029097188264131546, 'timestamp': '2025-09-10 02:48:49.949079', 'step': 19396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:49.980270', 'step': 19396, 'epoch': 3} {'type': 'loss', 'content': 0.021349117159843445, 'timestamp': '2025-09-10 02:48:49.982731', 'step': 19397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:50.015032', 'step': 19397, 'epoch': 3} {'type': 'loss', 'content': 0.051804523915052414, 'timestamp': '2025-09-10 02:48:50.017412', 'step': 19398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:50.049117', 'step': 19398, 'epoch': 3} {'type': 'loss', 'content': 0.038687702268362045, 'timestamp': '2025-09-10 02:48:50.051918', 'step': 19399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:50.084168', 'step': 19399, 'epoch': 3} {'type': 'loss', 'content': 0.10397158563137054, 'timestamp': '2025-09-10 02:48:50.111071', 'step': 19400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:50.151928', 'step': 19400, 'epoch': 3} {'type': 'loss', 'content': 0.10094787180423737, 'timestamp': '2025-09-10 02:48:50.156489', 'step': 19401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:50.194167', 'step': 19401, 'epoch': 3} {'type': 'loss', 'content': 0.026663657277822495, 'timestamp': '2025-09-10 02:48:50.199115', 'step': 19402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:50.238442', 'step': 19402, 'epoch': 3} {'type': 'loss', 'content': 0.058055877685546875, 'timestamp': '2025-09-10 02:48:50.241394', 'step': 19403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:50.272127', 'step': 19403, 'epoch': 3} {'type': 'loss', 'content': 0.09309644997119904, 'timestamp': '2025-09-10 02:48:50.295991', 'step': 19404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:50.326773', 'step': 19404, 'epoch': 3} {'type': 'loss', 'content': 0.018516717478632927, 'timestamp': '2025-09-10 02:48:50.330898', 'step': 19405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:50.361022', 'step': 19405, 'epoch': 3} {'type': 'loss', 'content': 0.09920413792133331, 'timestamp': '2025-09-10 02:48:50.363577', 'step': 19406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:50.394219', 'step': 19406, 'epoch': 3} {'type': 'loss', 'content': 0.17180421948432922, 'timestamp': '2025-09-10 02:48:50.396869', 'step': 19407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:50.428192', 'step': 19407, 'epoch': 3} {'type': 'loss', 'content': 0.039574481546878815, 'timestamp': '2025-09-10 02:48:50.452028', 'step': 19408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:50.482399', 'step': 19408, 'epoch': 3} {'type': 'loss', 'content': 0.04657182842493057, 'timestamp': '2025-09-10 02:48:50.484798', 'step': 19409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:50.517200', 'step': 19409, 'epoch': 3} {'type': 'loss', 'content': 0.0653224065899849, 'timestamp': '2025-09-10 02:48:50.519956', 'step': 19410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:50.550018', 'step': 19410, 'epoch': 3} {'type': 'loss', 'content': 0.05063161253929138, 'timestamp': '2025-09-10 02:48:50.552407', 'step': 19411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:50.583315', 'step': 19411, 'epoch': 3} {'type': 'loss', 'content': 0.034574251621961594, 'timestamp': '2025-09-10 02:48:50.606792', 'step': 19412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:50.637202', 'step': 19412, 'epoch': 3} {'type': 'loss', 'content': 0.08145594596862793, 'timestamp': '2025-09-10 02:48:50.639582', 'step': 19413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:50.670281', 'step': 19413, 'epoch': 3} {'type': 'loss', 'content': 0.02663748897612095, 'timestamp': '2025-09-10 02:48:50.672706', 'step': 19414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:50.704821', 'step': 19414, 'epoch': 3} {'type': 'loss', 'content': 0.07290364056825638, 'timestamp': '2025-09-10 02:48:50.707325', 'step': 19415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:50.737865', 'step': 19415, 'epoch': 3} {'type': 'loss', 'content': 0.07383248209953308, 'timestamp': '2025-09-10 02:48:50.761864', 'step': 19416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:50.792203', 'step': 19416, 'epoch': 3} {'type': 'loss', 'content': 0.15830843150615692, 'timestamp': '2025-09-10 02:48:50.794533', 'step': 19417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:50.825878', 'step': 19417, 'epoch': 3} {'type': 'loss', 'content': 0.10581548511981964, 'timestamp': '2025-09-10 02:48:50.830276', 'step': 19418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:50.860990', 'step': 19418, 'epoch': 3} {'type': 'loss', 'content': 0.09672942012548447, 'timestamp': '2025-09-10 02:48:50.863758', 'step': 19419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:50.893859', 'step': 19419, 'epoch': 3} {'type': 'loss', 'content': 0.043200284242630005, 'timestamp': '2025-09-10 02:48:50.919309', 'step': 19420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:50.950120', 'step': 19420, 'epoch': 3} {'type': 'loss', 'content': 0.11435951292514801, 'timestamp': '2025-09-10 02:48:50.952920', 'step': 19421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:50.983390', 'step': 19421, 'epoch': 3} {'type': 'loss', 'content': 0.048009440302848816, 'timestamp': '2025-09-10 02:48:50.985743', 'step': 19422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.016907', 'step': 19422, 'epoch': 3} {'type': 'loss', 'content': 0.11089132726192474, 'timestamp': '2025-09-10 02:48:51.019290', 'step': 19423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:51.050322', 'step': 19423, 'epoch': 3} {'type': 'loss', 'content': 0.04723536968231201, 'timestamp': '2025-09-10 02:48:51.073906', 'step': 19424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.105875', 'step': 19424, 'epoch': 3} {'type': 'loss', 'content': 0.07924444228410721, 'timestamp': '2025-09-10 02:48:51.108216', 'step': 19425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.138848', 'step': 19425, 'epoch': 3} {'type': 'loss', 'content': 0.068213552236557, 'timestamp': '2025-09-10 02:48:51.141247', 'step': 19426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.171419', 'step': 19426, 'epoch': 3} {'type': 'loss', 'content': 0.07050170004367828, 'timestamp': '2025-09-10 02:48:51.173886', 'step': 19427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.203946', 'step': 19427, 'epoch': 3} {'type': 'loss', 'content': 0.10740558803081512, 'timestamp': '2025-09-10 02:48:51.227857', 'step': 19428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:51.260669', 'step': 19428, 'epoch': 3} {'type': 'loss', 'content': 0.05890129879117012, 'timestamp': '2025-09-10 02:48:51.262882', 'step': 19429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:51.294303', 'step': 19429, 'epoch': 3} {'type': 'loss', 'content': 0.1279762238264084, 'timestamp': '2025-09-10 02:48:51.296937', 'step': 19430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:51.328926', 'step': 19430, 'epoch': 3} {'type': 'loss', 'content': 0.09351638704538345, 'timestamp': '2025-09-10 02:48:51.332702', 'step': 19431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:51.363803', 'step': 19431, 'epoch': 3} {'type': 'loss', 'content': 0.10740767419338226, 'timestamp': '2025-09-10 02:48:51.387518', 'step': 19432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.418520', 'step': 19432, 'epoch': 3} {'type': 'loss', 'content': 0.10336814075708389, 'timestamp': '2025-09-10 02:48:51.421836', 'step': 19433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:51.453119', 'step': 19433, 'epoch': 3} {'type': 'loss', 'content': 0.06599732488393784, 'timestamp': '2025-09-10 02:48:51.455903', 'step': 19434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:51.486579', 'step': 19434, 'epoch': 3} {'type': 'loss', 'content': 0.08134635537862778, 'timestamp': '2025-09-10 02:48:51.488950', 'step': 19435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:51.519348', 'step': 19435, 'epoch': 3} {'type': 'loss', 'content': 0.08587048202753067, 'timestamp': '2025-09-10 02:48:51.543293', 'step': 19436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.574073', 'step': 19436, 'epoch': 3} {'type': 'loss', 'content': 0.038644950836896896, 'timestamp': '2025-09-10 02:48:51.576455', 'step': 19437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:51.606206', 'step': 19437, 'epoch': 3} {'type': 'loss', 'content': 0.07197285443544388, 'timestamp': '2025-09-10 02:48:51.609078', 'step': 19438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:51.639429', 'step': 19438, 'epoch': 3} {'type': 'loss', 'content': 0.07590305060148239, 'timestamp': '2025-09-10 02:48:51.642064', 'step': 19439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:51.672259', 'step': 19439, 'epoch': 3} {'type': 'loss', 'content': 0.08366833627223969, 'timestamp': '2025-09-10 02:48:51.696180', 'step': 19440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:51.726729', 'step': 19440, 'epoch': 3} {'type': 'loss', 'content': 0.056096382439136505, 'timestamp': '2025-09-10 02:48:51.729129', 'step': 19441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.760870', 'step': 19441, 'epoch': 3} {'type': 'loss', 'content': 0.07934166491031647, 'timestamp': '2025-09-10 02:48:51.763544', 'step': 19442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.795372', 'step': 19442, 'epoch': 3} {'type': 'loss', 'content': 0.09612488001585007, 'timestamp': '2025-09-10 02:48:51.797715', 'step': 19443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:51.828394', 'step': 19443, 'epoch': 3} {'type': 'loss', 'content': 0.04024121165275574, 'timestamp': '2025-09-10 02:48:51.856734', 'step': 19444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:51.937602', 'step': 19444, 'epoch': 3} {'type': 'loss', 'content': 0.027443917468190193, 'timestamp': '2025-09-10 02:48:51.960863', 'step': 19445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:52.042535', 'step': 19445, 'epoch': 3} {'type': 'loss', 'content': 0.05732251703739166, 'timestamp': '2025-09-10 02:48:52.055791', 'step': 19446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:52.111822', 'step': 19446, 'epoch': 3} {'type': 'loss', 'content': 0.08544761687517166, 'timestamp': '2025-09-10 02:48:52.116958', 'step': 19447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:52.152185', 'step': 19447, 'epoch': 3} {'type': 'loss', 'content': 0.035865914076566696, 'timestamp': '2025-09-10 02:48:52.175853', 'step': 19448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:52.206949', 'step': 19448, 'epoch': 3} {'type': 'loss', 'content': 0.0608549639582634, 'timestamp': '2025-09-10 02:48:52.210392', 'step': 19449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:52.248494', 'step': 19449, 'epoch': 3} {'type': 'loss', 'content': 0.1013403981924057, 'timestamp': '2025-09-10 02:48:52.250926', 'step': 19450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:52.283043', 'step': 19450, 'epoch': 3} {'type': 'loss', 'content': 0.06288434565067291, 'timestamp': '2025-09-10 02:48:52.286746', 'step': 19451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:52.317713', 'step': 19451, 'epoch': 3} {'type': 'loss', 'content': 0.1077338382601738, 'timestamp': '2025-09-10 02:48:52.344501', 'step': 19452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:52.376146', 'step': 19452, 'epoch': 3} {'type': 'loss', 'content': 0.05011548474431038, 'timestamp': '2025-09-10 02:48:52.379254', 'step': 19453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:52.410886', 'step': 19453, 'epoch': 3} {'type': 'loss', 'content': 0.15673455595970154, 'timestamp': '2025-09-10 02:48:52.413261', 'step': 19454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:52.445556', 'step': 19454, 'epoch': 3} {'type': 'loss', 'content': 0.097437284886837, 'timestamp': '2025-09-10 02:48:52.448095', 'step': 19455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:52.480342', 'step': 19455, 'epoch': 3} {'type': 'loss', 'content': 0.08724893629550934, 'timestamp': '2025-09-10 02:48:52.504542', 'step': 19456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:52.536897', 'step': 19456, 'epoch': 3} {'type': 'loss', 'content': 0.05864039435982704, 'timestamp': '2025-09-10 02:48:52.539640', 'step': 19457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:52.572206', 'step': 19457, 'epoch': 3} {'type': 'loss', 'content': 0.10620446503162384, 'timestamp': '2025-09-10 02:48:52.574997', 'step': 19458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:52.607807', 'step': 19458, 'epoch': 3} {'type': 'loss', 'content': 0.04901837557554245, 'timestamp': '2025-09-10 02:48:52.610574', 'step': 19459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:52.642630', 'step': 19459, 'epoch': 3} {'type': 'loss', 'content': 0.04696240276098251, 'timestamp': '2025-09-10 02:48:52.667290', 'step': 19460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:52.700510', 'step': 19460, 'epoch': 3} {'type': 'loss', 'content': 0.09874830394983292, 'timestamp': '2025-09-10 02:48:52.702972', 'step': 19461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:48:52.735201', 'step': 19461, 'epoch': 3} {'type': 'loss', 'content': 0.06806796044111252, 'timestamp': '2025-09-10 02:48:52.739549', 'step': 19462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:52.771934', 'step': 19462, 'epoch': 3} {'type': 'loss', 'content': 0.16357964277267456, 'timestamp': '2025-09-10 02:48:52.776433', 'step': 19463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:52.807474', 'step': 19463, 'epoch': 3} {'type': 'loss', 'content': 0.052265580743551254, 'timestamp': '2025-09-10 02:48:52.833770', 'step': 19464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:52.882033', 'step': 19464, 'epoch': 3} {'type': 'loss', 'content': 0.13982179760932922, 'timestamp': '2025-09-10 02:48:52.884677', 'step': 19465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:52.916434', 'step': 19465, 'epoch': 3} {'type': 'loss', 'content': 0.036178890615701675, 'timestamp': '2025-09-10 02:48:52.919361', 'step': 19466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:52.951907', 'step': 19466, 'epoch': 3} {'type': 'loss', 'content': 0.0832226350903511, 'timestamp': '2025-09-10 02:48:52.955095', 'step': 19467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:52.985965', 'step': 19467, 'epoch': 3} {'type': 'loss', 'content': 0.09446433931589127, 'timestamp': '2025-09-10 02:48:53.013445', 'step': 19468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:53.046132', 'step': 19468, 'epoch': 3} {'type': 'loss', 'content': 0.05072445794939995, 'timestamp': '2025-09-10 02:48:53.048589', 'step': 19469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:53.080425', 'step': 19469, 'epoch': 3} {'type': 'loss', 'content': 0.10020672529935837, 'timestamp': '2025-09-10 02:48:53.082639', 'step': 19470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.116853', 'step': 19470, 'epoch': 3} {'type': 'loss', 'content': 0.008765027858316898, 'timestamp': '2025-09-10 02:48:53.120150', 'step': 19471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:53.152933', 'step': 19471, 'epoch': 3} {'type': 'loss', 'content': 0.034299079328775406, 'timestamp': '2025-09-10 02:48:53.176532', 'step': 19472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.206864', 'step': 19472, 'epoch': 3} {'type': 'loss', 'content': 0.05714309960603714, 'timestamp': '2025-09-10 02:48:53.209595', 'step': 19473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.240849', 'step': 19473, 'epoch': 3} {'type': 'loss', 'content': 0.06142919883131981, 'timestamp': '2025-09-10 02:48:53.244081', 'step': 19474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:53.274127', 'step': 19474, 'epoch': 3} {'type': 'loss', 'content': 0.08000202476978302, 'timestamp': '2025-09-10 02:48:53.276869', 'step': 19475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:48:53.307314', 'step': 19475, 'epoch': 3} {'type': 'loss', 'content': 0.039156991988420486, 'timestamp': '2025-09-10 02:48:53.330839', 'step': 19476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:53.361609', 'step': 19476, 'epoch': 3} {'type': 'loss', 'content': 0.1060190200805664, 'timestamp': '2025-09-10 02:48:53.363909', 'step': 19477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:53.394059', 'step': 19477, 'epoch': 3} {'type': 'loss', 'content': 0.055514562875032425, 'timestamp': '2025-09-10 02:48:53.396560', 'step': 19478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:53.427029', 'step': 19478, 'epoch': 3} {'type': 'loss', 'content': 0.045830633491277695, 'timestamp': '2025-09-10 02:48:53.429481', 'step': 19479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.481717', 'step': 19479, 'epoch': 3} {'type': 'loss', 'content': 0.07283750921487808, 'timestamp': '2025-09-10 02:48:53.505435', 'step': 19480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:53.536043', 'step': 19480, 'epoch': 3} {'type': 'loss', 'content': 0.057848285883665085, 'timestamp': '2025-09-10 02:48:53.538685', 'step': 19481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:53.569711', 'step': 19481, 'epoch': 3} {'type': 'loss', 'content': 0.031688392162323, 'timestamp': '2025-09-10 02:48:53.572389', 'step': 19482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:53.602454', 'step': 19482, 'epoch': 3} {'type': 'loss', 'content': 0.06168514862656593, 'timestamp': '2025-09-10 02:48:53.605338', 'step': 19483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.635417', 'step': 19483, 'epoch': 3} {'type': 'loss', 'content': 0.11539874225854874, 'timestamp': '2025-09-10 02:48:53.660216', 'step': 19484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.691339', 'step': 19484, 'epoch': 3} {'type': 'loss', 'content': 0.07666373997926712, 'timestamp': '2025-09-10 02:48:53.693598', 'step': 19485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:53.725684', 'step': 19485, 'epoch': 3} {'type': 'loss', 'content': 0.03490554541349411, 'timestamp': '2025-09-10 02:48:53.728369', 'step': 19486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.761337', 'step': 19486, 'epoch': 3} {'type': 'loss', 'content': 0.07544160634279251, 'timestamp': '2025-09-10 02:48:53.764535', 'step': 19487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.796190', 'step': 19487, 'epoch': 3} {'type': 'loss', 'content': 0.02907346934080124, 'timestamp': '2025-09-10 02:48:53.821377', 'step': 19488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.852270', 'step': 19488, 'epoch': 3} {'type': 'loss', 'content': 0.07415493577718735, 'timestamp': '2025-09-10 02:48:53.857628', 'step': 19489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:53.889294', 'step': 19489, 'epoch': 3} {'type': 'loss', 'content': 0.0717320665717125, 'timestamp': '2025-09-10 02:48:53.891563', 'step': 19490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:53.923094', 'step': 19490, 'epoch': 3} {'type': 'loss', 'content': 0.028825126588344574, 'timestamp': '2025-09-10 02:48:53.925898', 'step': 19491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:53.956169', 'step': 19491, 'epoch': 3} {'type': 'loss', 'content': 0.05258350446820259, 'timestamp': '2025-09-10 02:48:53.979884', 'step': 19492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:54.010268', 'step': 19492, 'epoch': 3} {'type': 'loss', 'content': 0.1281042844057083, 'timestamp': '2025-09-10 02:48:54.012650', 'step': 19493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:54.043149', 'step': 19493, 'epoch': 3} {'type': 'loss', 'content': 0.06051363795995712, 'timestamp': '2025-09-10 02:48:54.045767', 'step': 19494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:54.076723', 'step': 19494, 'epoch': 3} {'type': 'loss', 'content': 0.07839889824390411, 'timestamp': '2025-09-10 02:48:54.080382', 'step': 19495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:54.112560', 'step': 19495, 'epoch': 3} {'type': 'loss', 'content': 0.017401285469532013, 'timestamp': '2025-09-10 02:48:54.136274', 'step': 19496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:54.168525', 'step': 19496, 'epoch': 3} {'type': 'loss', 'content': 0.04483303427696228, 'timestamp': '2025-09-10 02:48:54.170911', 'step': 19497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:54.201221', 'step': 19497, 'epoch': 3} {'type': 'loss', 'content': 0.05864956974983215, 'timestamp': '2025-09-10 02:48:54.203477', 'step': 19498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:54.235049', 'step': 19498, 'epoch': 3} {'type': 'loss', 'content': 0.04690442234277725, 'timestamp': '2025-09-10 02:48:54.238860', 'step': 19499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:54.270970', 'step': 19499, 'epoch': 3} {'type': 'loss', 'content': 0.06672066450119019, 'timestamp': '2025-09-10 02:48:54.294598', 'step': 19500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19500', 'timestamp': '2025-09-10 02:48:59.104900', 'step': 19500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:59.138427', 'step': 19500, 'epoch': 3} {'type': 'loss', 'content': 0.0369514562189579, 'timestamp': '2025-09-10 02:48:59.140762', 'step': 19501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:59.173994', 'step': 19501, 'epoch': 3} {'type': 'loss', 'content': 0.10207143425941467, 'timestamp': '2025-09-10 02:48:59.176612', 'step': 19502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:59.209751', 'step': 19502, 'epoch': 3} {'type': 'loss', 'content': 0.10048921406269073, 'timestamp': '2025-09-10 02:48:59.212576', 'step': 19503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:59.244787', 'step': 19503, 'epoch': 3} {'type': 'loss', 'content': 0.04160097986459732, 'timestamp': '2025-09-10 02:48:59.268641', 'step': 19504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:59.300436', 'step': 19504, 'epoch': 3} {'type': 'loss', 'content': 0.08027023077011108, 'timestamp': '2025-09-10 02:48:59.302875', 'step': 19505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:59.335658', 'step': 19505, 'epoch': 3} {'type': 'loss', 'content': 0.04958778992295265, 'timestamp': '2025-09-10 02:48:59.350334', 'step': 19506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:59.382774', 'step': 19506, 'epoch': 3} {'type': 'loss', 'content': 0.036006852984428406, 'timestamp': '2025-09-10 02:48:59.385184', 'step': 19507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:59.415672', 'step': 19507, 'epoch': 3} {'type': 'loss', 'content': 0.06640513986349106, 'timestamp': '2025-09-10 02:48:59.439471', 'step': 19508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:59.470077', 'step': 19508, 'epoch': 3} {'type': 'loss', 'content': 0.07293763011693954, 'timestamp': '2025-09-10 02:48:59.472402', 'step': 19509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:59.502684', 'step': 19509, 'epoch': 3} {'type': 'loss', 'content': 0.10415046662092209, 'timestamp': '2025-09-10 02:48:59.504986', 'step': 19510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:59.535881', 'step': 19510, 'epoch': 3} {'type': 'loss', 'content': 0.018357254564762115, 'timestamp': '2025-09-10 02:48:59.538450', 'step': 19511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:48:59.569669', 'step': 19511, 'epoch': 3} {'type': 'loss', 'content': 0.03115842677652836, 'timestamp': '2025-09-10 02:48:59.594740', 'step': 19512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:59.626672', 'step': 19512, 'epoch': 3} {'type': 'loss', 'content': 0.040250204503536224, 'timestamp': '2025-09-10 02:48:59.632136', 'step': 19513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:59.670069', 'step': 19513, 'epoch': 3} {'type': 'loss', 'content': 0.12631376087665558, 'timestamp': '2025-09-10 02:48:59.672244', 'step': 19514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:48:59.703422', 'step': 19514, 'epoch': 3} {'type': 'loss', 'content': 0.06079782918095589, 'timestamp': '2025-09-10 02:48:59.705959', 'step': 19515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:59.737946', 'step': 19515, 'epoch': 3} {'type': 'loss', 'content': 0.04016827046871185, 'timestamp': '2025-09-10 02:48:59.761590', 'step': 19516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:59.793515', 'step': 19516, 'epoch': 3} {'type': 'loss', 'content': 0.03039097972214222, 'timestamp': '2025-09-10 02:48:59.795713', 'step': 19517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:59.825945', 'step': 19517, 'epoch': 3} {'type': 'loss', 'content': 0.04508878290653229, 'timestamp': '2025-09-10 02:48:59.829143', 'step': 19518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:48:59.859969', 'step': 19518, 'epoch': 3} {'type': 'loss', 'content': 0.07880164682865143, 'timestamp': '2025-09-10 02:48:59.862442', 'step': 19519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:48:59.893391', 'step': 19519, 'epoch': 3} {'type': 'loss', 'content': 0.13523055613040924, 'timestamp': '2025-09-10 02:48:59.917922', 'step': 19520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:48:59.948388', 'step': 19520, 'epoch': 3} {'type': 'loss', 'content': 0.06941626965999603, 'timestamp': '2025-09-10 02:48:59.951534', 'step': 19521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:48:59.981995', 'step': 19521, 'epoch': 3} {'type': 'loss', 'content': 0.04766431823372841, 'timestamp': '2025-09-10 02:48:59.984278', 'step': 19522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:00.014783', 'step': 19522, 'epoch': 3} {'type': 'loss', 'content': 0.12459634244441986, 'timestamp': '2025-09-10 02:49:00.018249', 'step': 19523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:00.048573', 'step': 19523, 'epoch': 3} {'type': 'loss', 'content': 0.06366956979036331, 'timestamp': '2025-09-10 02:49:00.072374', 'step': 19524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.103578', 'step': 19524, 'epoch': 3} {'type': 'loss', 'content': 0.04827484115958214, 'timestamp': '2025-09-10 02:49:00.106202', 'step': 19525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.136967', 'step': 19525, 'epoch': 3} {'type': 'loss', 'content': 0.05659735947847366, 'timestamp': '2025-09-10 02:49:00.139111', 'step': 19526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:00.168898', 'step': 19526, 'epoch': 3} {'type': 'loss', 'content': 0.0478762648999691, 'timestamp': '2025-09-10 02:49:00.172512', 'step': 19527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.205076', 'step': 19527, 'epoch': 3} {'type': 'loss', 'content': 0.04471764340996742, 'timestamp': '2025-09-10 02:49:00.228494', 'step': 19528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.259535', 'step': 19528, 'epoch': 3} {'type': 'loss', 'content': 0.03597700223326683, 'timestamp': '2025-09-10 02:49:00.261581', 'step': 19529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:00.292349', 'step': 19529, 'epoch': 3} {'type': 'loss', 'content': 0.07318930327892303, 'timestamp': '2025-09-10 02:49:00.294174', 'step': 19530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.324464', 'step': 19530, 'epoch': 3} {'type': 'loss', 'content': 0.05735459551215172, 'timestamp': '2025-09-10 02:49:00.326698', 'step': 19531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.357635', 'step': 19531, 'epoch': 3} {'type': 'loss', 'content': 0.10543318092823029, 'timestamp': '2025-09-10 02:49:00.381154', 'step': 19532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.415573', 'step': 19532, 'epoch': 3} {'type': 'loss', 'content': 0.13786469399929047, 'timestamp': '2025-09-10 02:49:00.418101', 'step': 19533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.450984', 'step': 19533, 'epoch': 3} {'type': 'loss', 'content': 0.05782797560095787, 'timestamp': '2025-09-10 02:49:00.453231', 'step': 19534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:00.483115', 'step': 19534, 'epoch': 3} {'type': 'loss', 'content': 0.05830361321568489, 'timestamp': '2025-09-10 02:49:00.485228', 'step': 19535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:00.515499', 'step': 19535, 'epoch': 3} {'type': 'loss', 'content': 0.014225533232092857, 'timestamp': '2025-09-10 02:49:00.538947', 'step': 19536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:00.569417', 'step': 19536, 'epoch': 3} {'type': 'loss', 'content': 0.03450492024421692, 'timestamp': '2025-09-10 02:49:00.571753', 'step': 19537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:00.602147', 'step': 19537, 'epoch': 3} {'type': 'loss', 'content': 0.01976933144032955, 'timestamp': '2025-09-10 02:49:00.604223', 'step': 19538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:00.634203', 'step': 19538, 'epoch': 3} {'type': 'loss', 'content': 0.03828266263008118, 'timestamp': '2025-09-10 02:49:00.636495', 'step': 19539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.666935', 'step': 19539, 'epoch': 3} {'type': 'loss', 'content': 0.13897670805454254, 'timestamp': '2025-09-10 02:49:00.690244', 'step': 19540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.720982', 'step': 19540, 'epoch': 3} {'type': 'loss', 'content': 0.045906949788331985, 'timestamp': '2025-09-10 02:49:00.722921', 'step': 19541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:00.754980', 'step': 19541, 'epoch': 3} {'type': 'loss', 'content': 0.04830881953239441, 'timestamp': '2025-09-10 02:49:00.757199', 'step': 19542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.789050', 'step': 19542, 'epoch': 3} {'type': 'loss', 'content': 0.04319331794977188, 'timestamp': '2025-09-10 02:49:00.791257', 'step': 19543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:00.822128', 'step': 19543, 'epoch': 3} {'type': 'loss', 'content': 0.032056648284196854, 'timestamp': '2025-09-10 02:49:00.845549', 'step': 19544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:00.876076', 'step': 19544, 'epoch': 3} {'type': 'loss', 'content': 0.14643502235412598, 'timestamp': '2025-09-10 02:49:00.879672', 'step': 19545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:00.912482', 'step': 19545, 'epoch': 3} {'type': 'loss', 'content': 0.1477905958890915, 'timestamp': '2025-09-10 02:49:00.914486', 'step': 19546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:00.945358', 'step': 19546, 'epoch': 3} {'type': 'loss', 'content': 0.037493009120225906, 'timestamp': '2025-09-10 02:49:00.949046', 'step': 19547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:00.987514', 'step': 19547, 'epoch': 3} {'type': 'loss', 'content': 0.016129711642861366, 'timestamp': '2025-09-10 02:49:01.011147', 'step': 19548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:01.041635', 'step': 19548, 'epoch': 3} {'type': 'loss', 'content': 0.07547245919704437, 'timestamp': '2025-09-10 02:49:01.044742', 'step': 19549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:01.076008', 'step': 19549, 'epoch': 3} {'type': 'loss', 'content': 0.07904595136642456, 'timestamp': '2025-09-10 02:49:01.078150', 'step': 19550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:01.108847', 'step': 19550, 'epoch': 3} {'type': 'loss', 'content': 0.03251497447490692, 'timestamp': '2025-09-10 02:49:01.111095', 'step': 19551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:01.141612', 'step': 19551, 'epoch': 3} {'type': 'loss', 'content': 0.07944023609161377, 'timestamp': '2025-09-10 02:49:01.165009', 'step': 19552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:01.195549', 'step': 19552, 'epoch': 3} {'type': 'loss', 'content': 0.07933249324560165, 'timestamp': '2025-09-10 02:49:01.197828', 'step': 19553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:01.228727', 'step': 19553, 'epoch': 3} {'type': 'loss', 'content': 0.02896343357861042, 'timestamp': '2025-09-10 02:49:01.233198', 'step': 19554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:01.264017', 'step': 19554, 'epoch': 3} {'type': 'loss', 'content': 0.033347662538290024, 'timestamp': '2025-09-10 02:49:01.266500', 'step': 19555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:01.296718', 'step': 19555, 'epoch': 3} {'type': 'loss', 'content': 0.05351276695728302, 'timestamp': '2025-09-10 02:49:01.321909', 'step': 19556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:01.354429', 'step': 19556, 'epoch': 3} {'type': 'loss', 'content': 0.11104108393192291, 'timestamp': '2025-09-10 02:49:01.356569', 'step': 19557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:01.387031', 'step': 19557, 'epoch': 3} {'type': 'loss', 'content': 0.10093220323324203, 'timestamp': '2025-09-10 02:49:01.389488', 'step': 19558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:01.421415', 'step': 19558, 'epoch': 3} {'type': 'loss', 'content': 0.1126243993639946, 'timestamp': '2025-09-10 02:49:01.423756', 'step': 19559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:01.455665', 'step': 19559, 'epoch': 3} {'type': 'loss', 'content': 0.13777655363082886, 'timestamp': '2025-09-10 02:49:01.479009', 'step': 19560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:01.510101', 'step': 19560, 'epoch': 3} {'type': 'loss', 'content': 0.0447554737329483, 'timestamp': '2025-09-10 02:49:01.512282', 'step': 19561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:01.543804', 'step': 19561, 'epoch': 3} {'type': 'loss', 'content': 0.02197219803929329, 'timestamp': '2025-09-10 02:49:01.546671', 'step': 19562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:01.579579', 'step': 19562, 'epoch': 3} {'type': 'loss', 'content': 0.04353650286793709, 'timestamp': '2025-09-10 02:49:01.582122', 'step': 19563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:01.613660', 'step': 19563, 'epoch': 3} {'type': 'loss', 'content': 0.040846165269613266, 'timestamp': '2025-09-10 02:49:01.637743', 'step': 19564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:01.670082', 'step': 19564, 'epoch': 3} {'type': 'loss', 'content': 0.08531031012535095, 'timestamp': '2025-09-10 02:49:01.672711', 'step': 19565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:01.704042', 'step': 19565, 'epoch': 3} {'type': 'loss', 'content': 0.03256424143910408, 'timestamp': '2025-09-10 02:49:01.706474', 'step': 19566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:01.737968', 'step': 19566, 'epoch': 3} {'type': 'loss', 'content': 0.13670377433300018, 'timestamp': '2025-09-10 02:49:01.742891', 'step': 19567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:01.776074', 'step': 19567, 'epoch': 3} {'type': 'loss', 'content': 0.05515715479850769, 'timestamp': '2025-09-10 02:49:01.799584', 'step': 19568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:01.831473', 'step': 19568, 'epoch': 3} {'type': 'loss', 'content': 0.010464225895702839, 'timestamp': '2025-09-10 02:49:01.836416', 'step': 19569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:01.878567', 'step': 19569, 'epoch': 3} {'type': 'loss', 'content': 0.06381110101938248, 'timestamp': '2025-09-10 02:49:01.881276', 'step': 19570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:01.918021', 'step': 19570, 'epoch': 3} {'type': 'loss', 'content': 0.1157960295677185, 'timestamp': '2025-09-10 02:49:01.921623', 'step': 19571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:01.958140', 'step': 19571, 'epoch': 3} {'type': 'loss', 'content': 0.023198796436190605, 'timestamp': '2025-09-10 02:49:01.984158', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:49:09.981047', 'step': 19572, 'epoch': 3} {'type': 'pplx', 'content': 11464.219968959926, 'timestamp': '2025-09-10 02:49:09.983899', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:10.013106', 'step': 19572, 'epoch': 3} {'type': 'loss', 'content': 0.08115195482969284, 'timestamp': '2025-09-10 02:49:10.015430', 'step': 19573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.047666', 'step': 19573, 'epoch': 3} {'type': 'loss', 'content': 0.09747249633073807, 'timestamp': '2025-09-10 02:49:10.050349', 'step': 19574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:10.081779', 'step': 19574, 'epoch': 3} {'type': 'loss', 'content': 0.08489041775465012, 'timestamp': '2025-09-10 02:49:10.084242', 'step': 19575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:10.114967', 'step': 19575, 'epoch': 3} {'type': 'loss', 'content': 0.026129351928830147, 'timestamp': '2025-09-10 02:49:10.138271', 'step': 19576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.168316', 'step': 19576, 'epoch': 3} {'type': 'loss', 'content': 0.042514901608228683, 'timestamp': '2025-09-10 02:49:10.170823', 'step': 19577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.203022', 'step': 19577, 'epoch': 3} {'type': 'loss', 'content': 0.09730648994445801, 'timestamp': '2025-09-10 02:49:10.206116', 'step': 19578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:10.238658', 'step': 19578, 'epoch': 3} {'type': 'loss', 'content': 0.04031019285321236, 'timestamp': '2025-09-10 02:49:10.241396', 'step': 19579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:10.272878', 'step': 19579, 'epoch': 3} {'type': 'loss', 'content': 0.12339210510253906, 'timestamp': '2025-09-10 02:49:10.298651', 'step': 19580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.328776', 'step': 19580, 'epoch': 3} {'type': 'loss', 'content': 0.02710595354437828, 'timestamp': '2025-09-10 02:49:10.331157', 'step': 19581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.360240', 'step': 19581, 'epoch': 3} {'type': 'loss', 'content': 0.051446519792079926, 'timestamp': '2025-09-10 02:49:10.362351', 'step': 19582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:10.392101', 'step': 19582, 'epoch': 3} {'type': 'loss', 'content': 0.10007069259881973, 'timestamp': '2025-09-10 02:49:10.393630', 'step': 19583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.421852', 'step': 19583, 'epoch': 3} {'type': 'loss', 'content': 0.05128582566976547, 'timestamp': '2025-09-10 02:49:10.444703', 'step': 19584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.473550', 'step': 19584, 'epoch': 3} {'type': 'loss', 'content': 0.031552527099847794, 'timestamp': '2025-09-10 02:49:10.476437', 'step': 19585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.507265', 'step': 19585, 'epoch': 3} {'type': 'loss', 'content': 0.05272895097732544, 'timestamp': '2025-09-10 02:49:10.509305', 'step': 19586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:10.538429', 'step': 19586, 'epoch': 3} {'type': 'loss', 'content': 0.028617143630981445, 'timestamp': '2025-09-10 02:49:10.541141', 'step': 19587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.570002', 'step': 19587, 'epoch': 3} {'type': 'loss', 'content': 0.0658135712146759, 'timestamp': '2025-09-10 02:49:10.594917', 'step': 19588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:10.624566', 'step': 19588, 'epoch': 3} {'type': 'loss', 'content': 0.07484565675258636, 'timestamp': '2025-09-10 02:49:10.626280', 'step': 19589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:10.655279', 'step': 19589, 'epoch': 3} {'type': 'loss', 'content': 0.05934762582182884, 'timestamp': '2025-09-10 02:49:10.657959', 'step': 19590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.688704', 'step': 19590, 'epoch': 3} {'type': 'loss', 'content': 0.06926115602254868, 'timestamp': '2025-09-10 02:49:10.690794', 'step': 19591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.720238', 'step': 19591, 'epoch': 3} {'type': 'loss', 'content': 0.1498461365699768, 'timestamp': '2025-09-10 02:49:10.743103', 'step': 19592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:10.771438', 'step': 19592, 'epoch': 3} {'type': 'loss', 'content': 0.04377053305506706, 'timestamp': '2025-09-10 02:49:10.773389', 'step': 19593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:10.804080', 'step': 19593, 'epoch': 3} {'type': 'loss', 'content': 0.06257539987564087, 'timestamp': '2025-09-10 02:49:10.805704', 'step': 19594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:10.835429', 'step': 19594, 'epoch': 3} {'type': 'loss', 'content': 0.12387106567621231, 'timestamp': '2025-09-10 02:49:10.838140', 'step': 19595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.867163', 'step': 19595, 'epoch': 3} {'type': 'loss', 'content': 0.0769641250371933, 'timestamp': '2025-09-10 02:49:10.894563', 'step': 19596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:10.930764', 'step': 19596, 'epoch': 3} {'type': 'loss', 'content': 0.12994582951068878, 'timestamp': '2025-09-10 02:49:10.935273', 'step': 19597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:10.968271', 'step': 19597, 'epoch': 3} {'type': 'loss', 'content': 0.07285861670970917, 'timestamp': '2025-09-10 02:49:10.971010', 'step': 19598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:11.007845', 'step': 19598, 'epoch': 3} {'type': 'loss', 'content': 0.1462104171514511, 'timestamp': '2025-09-10 02:49:11.010747', 'step': 19599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:11.048674', 'step': 19599, 'epoch': 3} {'type': 'loss', 'content': 0.06535067409276962, 'timestamp': '2025-09-10 02:49:11.074132', 'step': 19600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:11.111854', 'step': 19600, 'epoch': 3} {'type': 'loss', 'content': 0.026751644909381866, 'timestamp': '2025-09-10 02:49:11.114741', 'step': 19601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:11.147476', 'step': 19601, 'epoch': 3} {'type': 'loss', 'content': 0.09035375714302063, 'timestamp': '2025-09-10 02:49:11.150926', 'step': 19602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:11.181889', 'step': 19602, 'epoch': 3} {'type': 'loss', 'content': 0.006846393458545208, 'timestamp': '2025-09-10 02:49:11.183923', 'step': 19603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:11.212636', 'step': 19603, 'epoch': 3} {'type': 'loss', 'content': 0.07822258025407791, 'timestamp': '2025-09-10 02:49:11.235946', 'step': 19604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:11.264351', 'step': 19604, 'epoch': 3} {'type': 'loss', 'content': 0.042037371546030045, 'timestamp': '2025-09-10 02:49:11.266392', 'step': 19605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:11.294806', 'step': 19605, 'epoch': 3} {'type': 'loss', 'content': 0.08817719668149948, 'timestamp': '2025-09-10 02:49:11.296691', 'step': 19606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:11.325554', 'step': 19606, 'epoch': 3} {'type': 'loss', 'content': 0.03787744417786598, 'timestamp': '2025-09-10 02:49:11.328273', 'step': 19607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:11.356678', 'step': 19607, 'epoch': 3} {'type': 'loss', 'content': 0.12229888886213303, 'timestamp': '2025-09-10 02:49:11.380324', 'step': 19608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:11.425642', 'step': 19608, 'epoch': 3} {'type': 'loss', 'content': 0.03141029551625252, 'timestamp': '2025-09-10 02:49:11.429006', 'step': 19609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:11.465385', 'step': 19609, 'epoch': 3} {'type': 'loss', 'content': 0.025731517001986504, 'timestamp': '2025-09-10 02:49:11.468051', 'step': 19610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:11.497826', 'step': 19610, 'epoch': 3} {'type': 'loss', 'content': 0.08401501178741455, 'timestamp': '2025-09-10 02:49:11.500278', 'step': 19611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:11.530173', 'step': 19611, 'epoch': 3} {'type': 'loss', 'content': 0.029923787340521812, 'timestamp': '2025-09-10 02:49:11.554850', 'step': 19612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:11.585352', 'step': 19612, 'epoch': 3} {'type': 'loss', 'content': 0.028950819745659828, 'timestamp': '2025-09-10 02:49:11.587946', 'step': 19613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:11.618784', 'step': 19613, 'epoch': 3} {'type': 'loss', 'content': 0.10676499456167221, 'timestamp': '2025-09-10 02:49:11.621193', 'step': 19614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:11.651010', 'step': 19614, 'epoch': 3} {'type': 'loss', 'content': 0.03593619912862778, 'timestamp': '2025-09-10 02:49:11.653373', 'step': 19615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:11.683053', 'step': 19615, 'epoch': 3} {'type': 'loss', 'content': 0.07306338846683502, 'timestamp': '2025-09-10 02:49:11.706841', 'step': 19616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:11.736639', 'step': 19616, 'epoch': 3} {'type': 'loss', 'content': 0.034714724868535995, 'timestamp': '2025-09-10 02:49:11.740285', 'step': 19617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:11.769550', 'step': 19617, 'epoch': 3} {'type': 'loss', 'content': 0.10872650891542435, 'timestamp': '2025-09-10 02:49:11.771926', 'step': 19618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:11.801787', 'step': 19618, 'epoch': 3} {'type': 'loss', 'content': 0.0819537341594696, 'timestamp': '2025-09-10 02:49:11.804278', 'step': 19619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:11.834042', 'step': 19619, 'epoch': 3} {'type': 'loss', 'content': 0.15275628864765167, 'timestamp': '2025-09-10 02:49:11.857557', 'step': 19620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:11.888199', 'step': 19620, 'epoch': 3} {'type': 'loss', 'content': 0.07627778500318527, 'timestamp': '2025-09-10 02:49:11.890614', 'step': 19621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:11.921063', 'step': 19621, 'epoch': 3} {'type': 'loss', 'content': 0.045056845992803574, 'timestamp': '2025-09-10 02:49:11.923085', 'step': 19622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:11.951096', 'step': 19622, 'epoch': 3} {'type': 'loss', 'content': 0.06699313223361969, 'timestamp': '2025-09-10 02:49:11.953523', 'step': 19623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:11.982571', 'step': 19623, 'epoch': 3} {'type': 'loss', 'content': 0.05833971127867699, 'timestamp': '2025-09-10 02:49:12.006359', 'step': 19624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.036390', 'step': 19624, 'epoch': 3} {'type': 'loss', 'content': 0.06787977367639542, 'timestamp': '2025-09-10 02:49:12.038358', 'step': 19625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:12.068126', 'step': 19625, 'epoch': 3} {'type': 'loss', 'content': 0.060198090970516205, 'timestamp': '2025-09-10 02:49:12.071579', 'step': 19626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.104786', 'step': 19626, 'epoch': 3} {'type': 'loss', 'content': 0.022636812180280685, 'timestamp': '2025-09-10 02:49:12.107079', 'step': 19627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:12.136274', 'step': 19627, 'epoch': 3} {'type': 'loss', 'content': 0.07170768827199936, 'timestamp': '2025-09-10 02:49:12.160734', 'step': 19628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:12.191671', 'step': 19628, 'epoch': 3} {'type': 'loss', 'content': 0.019430844113230705, 'timestamp': '2025-09-10 02:49:12.194083', 'step': 19629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.223599', 'step': 19629, 'epoch': 3} {'type': 'loss', 'content': 0.054274704307317734, 'timestamp': '2025-09-10 02:49:12.225570', 'step': 19630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:12.254605', 'step': 19630, 'epoch': 3} {'type': 'loss', 'content': 0.11103339493274689, 'timestamp': '2025-09-10 02:49:12.256972', 'step': 19631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.286865', 'step': 19631, 'epoch': 3} {'type': 'loss', 'content': 0.08329828083515167, 'timestamp': '2025-09-10 02:49:12.310280', 'step': 19632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.339890', 'step': 19632, 'epoch': 3} {'type': 'loss', 'content': 0.11235412210226059, 'timestamp': '2025-09-10 02:49:12.342099', 'step': 19633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:12.371418', 'step': 19633, 'epoch': 3} {'type': 'loss', 'content': 0.22416125237941742, 'timestamp': '2025-09-10 02:49:12.373716', 'step': 19634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:12.404611', 'step': 19634, 'epoch': 3} {'type': 'loss', 'content': 0.1403542309999466, 'timestamp': '2025-09-10 02:49:12.407212', 'step': 19635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.436931', 'step': 19635, 'epoch': 3} {'type': 'loss', 'content': 0.1383066028356552, 'timestamp': '2025-09-10 02:49:12.460335', 'step': 19636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.490433', 'step': 19636, 'epoch': 3} {'type': 'loss', 'content': 0.09055580198764801, 'timestamp': '2025-09-10 02:49:12.492685', 'step': 19637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.522536', 'step': 19637, 'epoch': 3} {'type': 'loss', 'content': 0.038415972143411636, 'timestamp': '2025-09-10 02:49:12.524625', 'step': 19638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:12.553852', 'step': 19638, 'epoch': 3} {'type': 'loss', 'content': 0.08838550746440887, 'timestamp': '2025-09-10 02:49:12.556415', 'step': 19639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:12.585685', 'step': 19639, 'epoch': 3} {'type': 'loss', 'content': 0.02193053625524044, 'timestamp': '2025-09-10 02:49:12.609559', 'step': 19640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.639923', 'step': 19640, 'epoch': 3} {'type': 'loss', 'content': 0.09883376955986023, 'timestamp': '2025-09-10 02:49:12.642262', 'step': 19641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:12.671950', 'step': 19641, 'epoch': 3} {'type': 'loss', 'content': 0.05152449011802673, 'timestamp': '2025-09-10 02:49:12.674642', 'step': 19642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.706202', 'step': 19642, 'epoch': 3} {'type': 'loss', 'content': 0.05022847652435303, 'timestamp': '2025-09-10 02:49:12.708713', 'step': 19643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:12.738516', 'step': 19643, 'epoch': 3} {'type': 'loss', 'content': 0.01850401982665062, 'timestamp': '2025-09-10 02:49:12.761709', 'step': 19644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.791390', 'step': 19644, 'epoch': 3} {'type': 'loss', 'content': 0.03640143945813179, 'timestamp': '2025-09-10 02:49:12.793524', 'step': 19645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:12.822162', 'step': 19645, 'epoch': 3} {'type': 'loss', 'content': 0.15499013662338257, 'timestamp': '2025-09-10 02:49:12.824979', 'step': 19646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:12.854974', 'step': 19646, 'epoch': 3} {'type': 'loss', 'content': 0.11252008378505707, 'timestamp': '2025-09-10 02:49:12.857274', 'step': 19647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:12.886721', 'step': 19647, 'epoch': 3} {'type': 'loss', 'content': 0.16980379819869995, 'timestamp': '2025-09-10 02:49:12.910181', 'step': 19648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:12.939406', 'step': 19648, 'epoch': 3} {'type': 'loss', 'content': 0.03792038559913635, 'timestamp': '2025-09-10 02:49:12.941724', 'step': 19649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:12.971266', 'step': 19649, 'epoch': 3} {'type': 'loss', 'content': 0.08779694885015488, 'timestamp': '2025-09-10 02:49:12.974345', 'step': 19650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.020876', 'step': 19650, 'epoch': 3} {'type': 'loss', 'content': 0.10002261400222778, 'timestamp': '2025-09-10 02:49:13.023261', 'step': 19651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:13.052816', 'step': 19651, 'epoch': 3} {'type': 'loss', 'content': 0.029407992959022522, 'timestamp': '2025-09-10 02:49:13.076495', 'step': 19652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:13.106785', 'step': 19652, 'epoch': 3} {'type': 'loss', 'content': 0.13131725788116455, 'timestamp': '2025-09-10 02:49:13.108940', 'step': 19653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.138665', 'step': 19653, 'epoch': 3} {'type': 'loss', 'content': 0.05598960444331169, 'timestamp': '2025-09-10 02:49:13.140761', 'step': 19654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:13.172112', 'step': 19654, 'epoch': 3} {'type': 'loss', 'content': 0.1621546745300293, 'timestamp': '2025-09-10 02:49:13.174682', 'step': 19655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.205026', 'step': 19655, 'epoch': 3} {'type': 'loss', 'content': 0.06407657265663147, 'timestamp': '2025-09-10 02:49:13.228723', 'step': 19656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.258764', 'step': 19656, 'epoch': 3} {'type': 'loss', 'content': 0.03195677697658539, 'timestamp': '2025-09-10 02:49:13.261063', 'step': 19657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:13.290626', 'step': 19657, 'epoch': 3} {'type': 'loss', 'content': 0.042496681213378906, 'timestamp': '2025-09-10 02:49:13.292849', 'step': 19658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.321872', 'step': 19658, 'epoch': 3} {'type': 'loss', 'content': 0.02438659779727459, 'timestamp': '2025-09-10 02:49:13.323913', 'step': 19659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:13.352208', 'step': 19659, 'epoch': 3} {'type': 'loss', 'content': 0.06118510663509369, 'timestamp': '2025-09-10 02:49:13.375701', 'step': 19660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:13.405243', 'step': 19660, 'epoch': 3} {'type': 'loss', 'content': 0.10916073620319366, 'timestamp': '2025-09-10 02:49:13.407561', 'step': 19661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.437329', 'step': 19661, 'epoch': 3} {'type': 'loss', 'content': 0.027611330151557922, 'timestamp': '2025-09-10 02:49:13.439388', 'step': 19662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.467808', 'step': 19662, 'epoch': 3} {'type': 'loss', 'content': 0.0900806337594986, 'timestamp': '2025-09-10 02:49:13.470074', 'step': 19663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:13.499482', 'step': 19663, 'epoch': 3} {'type': 'loss', 'content': 0.008971878327429295, 'timestamp': '2025-09-10 02:49:13.523277', 'step': 19664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:13.552206', 'step': 19664, 'epoch': 3} {'type': 'loss', 'content': 0.060434333980083466, 'timestamp': '2025-09-10 02:49:13.554277', 'step': 19665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.583165', 'step': 19665, 'epoch': 3} {'type': 'loss', 'content': 0.11179287731647491, 'timestamp': '2025-09-10 02:49:13.585860', 'step': 19666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.617588', 'step': 19666, 'epoch': 3} {'type': 'loss', 'content': 0.0671834945678711, 'timestamp': '2025-09-10 02:49:13.619759', 'step': 19667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:13.648984', 'step': 19667, 'epoch': 3} {'type': 'loss', 'content': 0.03733702376484871, 'timestamp': '2025-09-10 02:49:13.672513', 'step': 19668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:13.702663', 'step': 19668, 'epoch': 3} {'type': 'loss', 'content': 0.06225065886974335, 'timestamp': '2025-09-10 02:49:13.705104', 'step': 19669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:13.735050', 'step': 19669, 'epoch': 3} {'type': 'loss', 'content': 0.012208675034344196, 'timestamp': '2025-09-10 02:49:13.737532', 'step': 19670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:13.766204', 'step': 19670, 'epoch': 3} {'type': 'loss', 'content': 0.13043932616710663, 'timestamp': '2025-09-10 02:49:13.768800', 'step': 19671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:13.798900', 'step': 19671, 'epoch': 3} {'type': 'loss', 'content': 0.1340387910604477, 'timestamp': '2025-09-10 02:49:13.822429', 'step': 19672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:13.866040', 'step': 19672, 'epoch': 3} {'type': 'loss', 'content': 0.1334572285413742, 'timestamp': '2025-09-10 02:49:13.869182', 'step': 19673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:13.899098', 'step': 19673, 'epoch': 3} {'type': 'loss', 'content': 0.08864980190992355, 'timestamp': '2025-09-10 02:49:13.901478', 'step': 19674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:13.931081', 'step': 19674, 'epoch': 3} {'type': 'loss', 'content': 0.10939929634332657, 'timestamp': '2025-09-10 02:49:13.933321', 'step': 19675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:13.962182', 'step': 19675, 'epoch': 3} {'type': 'loss', 'content': 0.07696477323770523, 'timestamp': '2025-09-10 02:49:13.985541', 'step': 19676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:14.015254', 'step': 19676, 'epoch': 3} {'type': 'loss', 'content': 0.0716254934668541, 'timestamp': '2025-09-10 02:49:14.018685', 'step': 19677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:14.051481', 'step': 19677, 'epoch': 3} {'type': 'loss', 'content': 0.05872470885515213, 'timestamp': '2025-09-10 02:49:14.054392', 'step': 19678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:14.084258', 'step': 19678, 'epoch': 3} {'type': 'loss', 'content': 0.041990794241428375, 'timestamp': '2025-09-10 02:49:14.086509', 'step': 19679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:14.116269', 'step': 19679, 'epoch': 3} {'type': 'loss', 'content': 0.09520267695188522, 'timestamp': '2025-09-10 02:49:14.139575', 'step': 19680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.169663', 'step': 19680, 'epoch': 3} {'type': 'loss', 'content': 0.10171936452388763, 'timestamp': '2025-09-10 02:49:14.171856', 'step': 19681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.201066', 'step': 19681, 'epoch': 3} {'type': 'loss', 'content': 0.007707738783210516, 'timestamp': '2025-09-10 02:49:14.203409', 'step': 19682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.232672', 'step': 19682, 'epoch': 3} {'type': 'loss', 'content': 0.013988726772367954, 'timestamp': '2025-09-10 02:49:14.234523', 'step': 19683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:14.264657', 'step': 19683, 'epoch': 3} {'type': 'loss', 'content': 0.06949158757925034, 'timestamp': '2025-09-10 02:49:14.288141', 'step': 19684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.317801', 'step': 19684, 'epoch': 3} {'type': 'loss', 'content': 0.061229441314935684, 'timestamp': '2025-09-10 02:49:14.320258', 'step': 19685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:14.349629', 'step': 19685, 'epoch': 3} {'type': 'loss', 'content': 0.11608397215604782, 'timestamp': '2025-09-10 02:49:14.352202', 'step': 19686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.381905', 'step': 19686, 'epoch': 3} {'type': 'loss', 'content': 0.019550155848264694, 'timestamp': '2025-09-10 02:49:14.384345', 'step': 19687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:14.413927', 'step': 19687, 'epoch': 3} {'type': 'loss', 'content': 0.09340044856071472, 'timestamp': '2025-09-10 02:49:14.437469', 'step': 19688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:14.467515', 'step': 19688, 'epoch': 3} {'type': 'loss', 'content': 0.07370664924383163, 'timestamp': '2025-09-10 02:49:14.469729', 'step': 19689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:14.499580', 'step': 19689, 'epoch': 3} {'type': 'loss', 'content': 0.10090017318725586, 'timestamp': '2025-09-10 02:49:14.501719', 'step': 19690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.532553', 'step': 19690, 'epoch': 3} {'type': 'loss', 'content': 0.02167741395533085, 'timestamp': '2025-09-10 02:49:14.534779', 'step': 19691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.564296', 'step': 19691, 'epoch': 3} {'type': 'loss', 'content': 0.044904135167598724, 'timestamp': '2025-09-10 02:49:14.587964', 'step': 19692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.617808', 'step': 19692, 'epoch': 3} {'type': 'loss', 'content': 0.1130337342619896, 'timestamp': '2025-09-10 02:49:14.620594', 'step': 19693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:14.650094', 'step': 19693, 'epoch': 3} {'type': 'loss', 'content': 0.10498671978712082, 'timestamp': '2025-09-10 02:49:14.652354', 'step': 19694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:14.681432', 'step': 19694, 'epoch': 3} {'type': 'loss', 'content': 0.10253194719552994, 'timestamp': '2025-09-10 02:49:14.683778', 'step': 19695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.713920', 'step': 19695, 'epoch': 3} {'type': 'loss', 'content': 0.024207662791013718, 'timestamp': '2025-09-10 02:49:14.737055', 'step': 19696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:14.766297', 'step': 19696, 'epoch': 3} {'type': 'loss', 'content': 0.08166859298944473, 'timestamp': '2025-09-10 02:49:14.768753', 'step': 19697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:14.797879', 'step': 19697, 'epoch': 3} {'type': 'loss', 'content': 0.03277308866381645, 'timestamp': '2025-09-10 02:49:14.800497', 'step': 19698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:14.832040', 'step': 19698, 'epoch': 3} {'type': 'loss', 'content': 0.08189120888710022, 'timestamp': '2025-09-10 02:49:14.834468', 'step': 19699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:14.864363', 'step': 19699, 'epoch': 3} {'type': 'loss', 'content': 0.010527731850743294, 'timestamp': '2025-09-10 02:49:14.888755', 'step': 19700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:14.918324', 'step': 19700, 'epoch': 3} {'type': 'loss', 'content': 0.11558648943901062, 'timestamp': '2025-09-10 02:49:14.920334', 'step': 19701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:14.949089', 'step': 19701, 'epoch': 3} {'type': 'loss', 'content': 0.11090834438800812, 'timestamp': '2025-09-10 02:49:14.951246', 'step': 19702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:14.980857', 'step': 19702, 'epoch': 3} {'type': 'loss', 'content': 0.0434185154736042, 'timestamp': '2025-09-10 02:49:14.988321', 'step': 19703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.034773', 'step': 19703, 'epoch': 3} {'type': 'loss', 'content': 0.1431131809949875, 'timestamp': '2025-09-10 02:49:15.061771', 'step': 19704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:15.092059', 'step': 19704, 'epoch': 3} {'type': 'loss', 'content': 0.0682033821940422, 'timestamp': '2025-09-10 02:49:15.094080', 'step': 19705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.123333', 'step': 19705, 'epoch': 3} {'type': 'loss', 'content': 0.028994526714086533, 'timestamp': '2025-09-10 02:49:15.125801', 'step': 19706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.156215', 'step': 19706, 'epoch': 3} {'type': 'loss', 'content': 0.013721567578613758, 'timestamp': '2025-09-10 02:49:15.158330', 'step': 19707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.187789', 'step': 19707, 'epoch': 3} {'type': 'loss', 'content': 0.020428873598575592, 'timestamp': '2025-09-10 02:49:15.211314', 'step': 19708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:15.242378', 'step': 19708, 'epoch': 3} {'type': 'loss', 'content': 0.03979748860001564, 'timestamp': '2025-09-10 02:49:15.244575', 'step': 19709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.273999', 'step': 19709, 'epoch': 3} {'type': 'loss', 'content': 0.06835373491048813, 'timestamp': '2025-09-10 02:49:15.276686', 'step': 19710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.306734', 'step': 19710, 'epoch': 3} {'type': 'loss', 'content': 0.052327658981084824, 'timestamp': '2025-09-10 02:49:15.309022', 'step': 19711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:15.340284', 'step': 19711, 'epoch': 3} {'type': 'loss', 'content': 0.1329582780599594, 'timestamp': '2025-09-10 02:49:15.363579', 'step': 19712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.393445', 'step': 19712, 'epoch': 3} {'type': 'loss', 'content': 0.09695082902908325, 'timestamp': '2025-09-10 02:49:15.395874', 'step': 19713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:15.426742', 'step': 19713, 'epoch': 3} {'type': 'loss', 'content': 0.06110471487045288, 'timestamp': '2025-09-10 02:49:15.428960', 'step': 19714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:15.459997', 'step': 19714, 'epoch': 3} {'type': 'loss', 'content': 0.04053730145096779, 'timestamp': '2025-09-10 02:49:15.462627', 'step': 19715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:15.492488', 'step': 19715, 'epoch': 3} {'type': 'loss', 'content': 0.06082834675908089, 'timestamp': '2025-09-10 02:49:15.516302', 'step': 19716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:15.547415', 'step': 19716, 'epoch': 3} {'type': 'loss', 'content': 0.1167304515838623, 'timestamp': '2025-09-10 02:49:15.549652', 'step': 19717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:15.579657', 'step': 19717, 'epoch': 3} {'type': 'loss', 'content': 0.09930089116096497, 'timestamp': '2025-09-10 02:49:15.581856', 'step': 19718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.611280', 'step': 19718, 'epoch': 3} {'type': 'loss', 'content': 0.050712183117866516, 'timestamp': '2025-09-10 02:49:15.613534', 'step': 19719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.644436', 'step': 19719, 'epoch': 3} {'type': 'loss', 'content': 0.06755731254816055, 'timestamp': '2025-09-10 02:49:15.667591', 'step': 19720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:15.697729', 'step': 19720, 'epoch': 3} {'type': 'loss', 'content': 0.03507399559020996, 'timestamp': '2025-09-10 02:49:15.699741', 'step': 19721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:15.729715', 'step': 19721, 'epoch': 3} {'type': 'loss', 'content': 0.08936970680952072, 'timestamp': '2025-09-10 02:49:15.732235', 'step': 19722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:15.761334', 'step': 19722, 'epoch': 3} {'type': 'loss', 'content': 0.08640116453170776, 'timestamp': '2025-09-10 02:49:15.763628', 'step': 19723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:15.793105', 'step': 19723, 'epoch': 3} {'type': 'loss', 'content': 0.08784624934196472, 'timestamp': '2025-09-10 02:49:15.816686', 'step': 19724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:15.846985', 'step': 19724, 'epoch': 3} {'type': 'loss', 'content': 0.16073845326900482, 'timestamp': '2025-09-10 02:49:15.849136', 'step': 19725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.878029', 'step': 19725, 'epoch': 3} {'type': 'loss', 'content': 0.07606334984302521, 'timestamp': '2025-09-10 02:49:15.880722', 'step': 19726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:15.912628', 'step': 19726, 'epoch': 3} {'type': 'loss', 'content': 0.0283235851675272, 'timestamp': '2025-09-10 02:49:15.914842', 'step': 19727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:15.944062', 'step': 19727, 'epoch': 3} {'type': 'loss', 'content': 0.061871759593486786, 'timestamp': '2025-09-10 02:49:15.967814', 'step': 19728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:15.997155', 'step': 19728, 'epoch': 3} {'type': 'loss', 'content': 0.05361245945096016, 'timestamp': '2025-09-10 02:49:15.999457', 'step': 19729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:16.029365', 'step': 19729, 'epoch': 3} {'type': 'loss', 'content': 0.12699517607688904, 'timestamp': '2025-09-10 02:49:16.032573', 'step': 19730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:16.062327', 'step': 19730, 'epoch': 3} {'type': 'loss', 'content': 0.04315333440899849, 'timestamp': '2025-09-10 02:49:16.064391', 'step': 19731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:16.094802', 'step': 19731, 'epoch': 3} {'type': 'loss', 'content': 0.0896289199590683, 'timestamp': '2025-09-10 02:49:16.118575', 'step': 19732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:16.149565', 'step': 19732, 'epoch': 3} {'type': 'loss', 'content': 0.0640653595328331, 'timestamp': '2025-09-10 02:49:16.151753', 'step': 19733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:16.181591', 'step': 19733, 'epoch': 3} {'type': 'loss', 'content': 0.06293369829654694, 'timestamp': '2025-09-10 02:49:16.183881', 'step': 19734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:16.214309', 'step': 19734, 'epoch': 3} {'type': 'loss', 'content': 0.05013451352715492, 'timestamp': '2025-09-10 02:49:16.216515', 'step': 19735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:16.246309', 'step': 19735, 'epoch': 3} {'type': 'loss', 'content': 0.05075351148843765, 'timestamp': '2025-09-10 02:49:16.269443', 'step': 19736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:16.299515', 'step': 19736, 'epoch': 3} {'type': 'loss', 'content': 0.06377297639846802, 'timestamp': '2025-09-10 02:49:16.301806', 'step': 19737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:16.330986', 'step': 19737, 'epoch': 3} {'type': 'loss', 'content': 0.05903434008359909, 'timestamp': '2025-09-10 02:49:16.333189', 'step': 19738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:16.362339', 'step': 19738, 'epoch': 3} {'type': 'loss', 'content': 0.03779975324869156, 'timestamp': '2025-09-10 02:49:16.364695', 'step': 19739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:16.394289', 'step': 19739, 'epoch': 3} {'type': 'loss', 'content': 0.040118258446455, 'timestamp': '2025-09-10 02:49:16.420203', 'step': 19740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:16.452746', 'step': 19740, 'epoch': 3} {'type': 'loss', 'content': 0.02564530447125435, 'timestamp': '2025-09-10 02:49:16.455927', 'step': 19741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:16.490162', 'step': 19741, 'epoch': 3} {'type': 'loss', 'content': 0.12971030175685883, 'timestamp': '2025-09-10 02:49:16.495885', 'step': 19742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:16.534402', 'step': 19742, 'epoch': 3} {'type': 'loss', 'content': 0.002314950106665492, 'timestamp': '2025-09-10 02:49:16.538977', 'step': 19743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:16.577487', 'step': 19743, 'epoch': 3} {'type': 'loss', 'content': 0.026587095111608505, 'timestamp': '2025-09-10 02:49:16.604066', 'step': 19744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:16.638604', 'step': 19744, 'epoch': 3} {'type': 'loss', 'content': 0.04169060289859772, 'timestamp': '2025-09-10 02:49:16.640755', 'step': 19745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:16.670553', 'step': 19745, 'epoch': 3} {'type': 'loss', 'content': 0.03897371515631676, 'timestamp': '2025-09-10 02:49:16.672822', 'step': 19746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:16.702759', 'step': 19746, 'epoch': 3} {'type': 'loss', 'content': 0.04742869362235069, 'timestamp': '2025-09-10 02:49:16.705328', 'step': 19747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:16.735061', 'step': 19747, 'epoch': 3} {'type': 'loss', 'content': 0.024589814245700836, 'timestamp': '2025-09-10 02:49:16.758569', 'step': 19748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:16.788179', 'step': 19748, 'epoch': 3} {'type': 'loss', 'content': 0.041032493114471436, 'timestamp': '2025-09-10 02:49:16.790591', 'step': 19749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:16.820612', 'step': 19749, 'epoch': 3} {'type': 'loss', 'content': 0.047318343073129654, 'timestamp': '2025-09-10 02:49:16.822808', 'step': 19750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:16.851760', 'step': 19750, 'epoch': 3} {'type': 'loss', 'content': 0.08452698588371277, 'timestamp': '2025-09-10 02:49:16.853728', 'step': 19751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:16.883347', 'step': 19751, 'epoch': 3} {'type': 'loss', 'content': 0.04308725520968437, 'timestamp': '2025-09-10 02:49:16.906591', 'step': 19752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:16.936045', 'step': 19752, 'epoch': 3} {'type': 'loss', 'content': 0.02294820360839367, 'timestamp': '2025-09-10 02:49:16.938223', 'step': 19753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:16.967811', 'step': 19753, 'epoch': 3} {'type': 'loss', 'content': 0.06512579321861267, 'timestamp': '2025-09-10 02:49:16.969758', 'step': 19754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:16.998947', 'step': 19754, 'epoch': 3} {'type': 'loss', 'content': 0.034722525626420975, 'timestamp': '2025-09-10 02:49:17.001334', 'step': 19755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:17.030272', 'step': 19755, 'epoch': 3} {'type': 'loss', 'content': 0.08281964808702469, 'timestamp': '2025-09-10 02:49:17.053252', 'step': 19756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:17.082530', 'step': 19756, 'epoch': 3} {'type': 'loss', 'content': 0.014007577672600746, 'timestamp': '2025-09-10 02:49:17.084449', 'step': 19757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:17.113997', 'step': 19757, 'epoch': 3} {'type': 'loss', 'content': 0.03620796278119087, 'timestamp': '2025-09-10 02:49:17.116321', 'step': 19758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:17.147222', 'step': 19758, 'epoch': 3} {'type': 'loss', 'content': 0.12213415652513504, 'timestamp': '2025-09-10 02:49:17.149472', 'step': 19759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:17.178046', 'step': 19759, 'epoch': 3} {'type': 'loss', 'content': 0.04581019654870033, 'timestamp': '2025-09-10 02:49:17.201608', 'step': 19760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:17.231469', 'step': 19760, 'epoch': 3} {'type': 'loss', 'content': 0.09461773931980133, 'timestamp': '2025-09-10 02:49:17.233613', 'step': 19761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:17.264619', 'step': 19761, 'epoch': 3} {'type': 'loss', 'content': 0.04914592206478119, 'timestamp': '2025-09-10 02:49:17.266807', 'step': 19762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:17.296288', 'step': 19762, 'epoch': 3} {'type': 'loss', 'content': 0.08676954358816147, 'timestamp': '2025-09-10 02:49:17.299074', 'step': 19763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:17.328746', 'step': 19763, 'epoch': 3} {'type': 'loss', 'content': 0.046389415860176086, 'timestamp': '2025-09-10 02:49:17.352293', 'step': 19764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:17.381537', 'step': 19764, 'epoch': 3} {'type': 'loss', 'content': 0.05866044759750366, 'timestamp': '2025-09-10 02:49:17.383851', 'step': 19765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:17.413505', 'step': 19765, 'epoch': 3} {'type': 'loss', 'content': 0.03512856364250183, 'timestamp': '2025-09-10 02:49:17.415713', 'step': 19766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:17.445803', 'step': 19766, 'epoch': 3} {'type': 'loss', 'content': 0.07163117825984955, 'timestamp': '2025-09-10 02:49:17.447983', 'step': 19767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:17.477128', 'step': 19767, 'epoch': 3} {'type': 'loss', 'content': 0.054878391325473785, 'timestamp': '2025-09-10 02:49:17.500726', 'step': 19768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:17.532544', 'step': 19768, 'epoch': 3} {'type': 'loss', 'content': 0.09997022151947021, 'timestamp': '2025-09-10 02:49:17.535224', 'step': 19769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:17.568761', 'step': 19769, 'epoch': 3} {'type': 'loss', 'content': 0.07765404134988785, 'timestamp': '2025-09-10 02:49:17.571871', 'step': 19770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:17.603664', 'step': 19770, 'epoch': 3} {'type': 'loss', 'content': 0.08754383027553558, 'timestamp': '2025-09-10 02:49:17.606489', 'step': 19771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:17.637954', 'step': 19771, 'epoch': 3} {'type': 'loss', 'content': 0.0791444256901741, 'timestamp': '2025-09-10 02:49:17.662041', 'step': 19772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:17.692581', 'step': 19772, 'epoch': 3} {'type': 'loss', 'content': 0.03672162443399429, 'timestamp': '2025-09-10 02:49:17.694486', 'step': 19773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:17.723738', 'step': 19773, 'epoch': 3} {'type': 'loss', 'content': 0.03400372713804245, 'timestamp': '2025-09-10 02:49:17.726268', 'step': 19774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:17.756047', 'step': 19774, 'epoch': 3} {'type': 'loss', 'content': 0.08292749524116516, 'timestamp': '2025-09-10 02:49:17.758270', 'step': 19775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:17.787788', 'step': 19775, 'epoch': 3} {'type': 'loss', 'content': 0.10810420662164688, 'timestamp': '2025-09-10 02:49:17.811313', 'step': 19776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:17.840353', 'step': 19776, 'epoch': 3} {'type': 'loss', 'content': 0.09180329740047455, 'timestamp': '2025-09-10 02:49:17.842726', 'step': 19777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:17.873315', 'step': 19777, 'epoch': 3} {'type': 'loss', 'content': 0.11601237952709198, 'timestamp': '2025-09-10 02:49:17.875533', 'step': 19778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:17.906620', 'step': 19778, 'epoch': 3} {'type': 'loss', 'content': 0.08585314452648163, 'timestamp': '2025-09-10 02:49:17.910950', 'step': 19779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:17.949177', 'step': 19779, 'epoch': 3} {'type': 'loss', 'content': 0.054572202265262604, 'timestamp': '2025-09-10 02:49:17.973921', 'step': 19780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.003797', 'step': 19780, 'epoch': 3} {'type': 'loss', 'content': 0.06870162487030029, 'timestamp': '2025-09-10 02:49:18.009325', 'step': 19781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:18.047028', 'step': 19781, 'epoch': 3} {'type': 'loss', 'content': 0.06601998209953308, 'timestamp': '2025-09-10 02:49:18.049258', 'step': 19782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:18.079814', 'step': 19782, 'epoch': 3} {'type': 'loss', 'content': 0.06037481874227524, 'timestamp': '2025-09-10 02:49:18.082848', 'step': 19783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:18.112619', 'step': 19783, 'epoch': 3} {'type': 'loss', 'content': 0.017432155087590218, 'timestamp': '2025-09-10 02:49:18.136257', 'step': 19784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:18.166784', 'step': 19784, 'epoch': 3} {'type': 'loss', 'content': 0.04082855209708214, 'timestamp': '2025-09-10 02:49:18.169284', 'step': 19785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:49:18.202978', 'step': 19785, 'epoch': 3} {'type': 'loss', 'content': 0.09102980047464371, 'timestamp': '2025-09-10 02:49:18.207605', 'step': 19786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.237324', 'step': 19786, 'epoch': 3} {'type': 'loss', 'content': 0.060861725360155106, 'timestamp': '2025-09-10 02:49:18.239584', 'step': 19787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:18.268984', 'step': 19787, 'epoch': 3} {'type': 'loss', 'content': 0.08224735409021378, 'timestamp': '2025-09-10 02:49:18.298621', 'step': 19788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:18.329063', 'step': 19788, 'epoch': 3} {'type': 'loss', 'content': 0.08155746757984161, 'timestamp': '2025-09-10 02:49:18.331205', 'step': 19789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:18.360585', 'step': 19789, 'epoch': 3} {'type': 'loss', 'content': 0.07170707732439041, 'timestamp': '2025-09-10 02:49:18.362679', 'step': 19790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:18.391893', 'step': 19790, 'epoch': 3} {'type': 'loss', 'content': 0.08269736170768738, 'timestamp': '2025-09-10 02:49:18.394459', 'step': 19791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.424429', 'step': 19791, 'epoch': 3} {'type': 'loss', 'content': 0.046209003776311874, 'timestamp': '2025-09-10 02:49:18.447973', 'step': 19792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.477152', 'step': 19792, 'epoch': 3} {'type': 'loss', 'content': 0.03879327327013016, 'timestamp': '2025-09-10 02:49:18.479536', 'step': 19793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.509122', 'step': 19793, 'epoch': 3} {'type': 'loss', 'content': 0.0836666151881218, 'timestamp': '2025-09-10 02:49:18.511362', 'step': 19794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.541077', 'step': 19794, 'epoch': 3} {'type': 'loss', 'content': 0.07717368006706238, 'timestamp': '2025-09-10 02:49:18.543493', 'step': 19795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:18.573203', 'step': 19795, 'epoch': 3} {'type': 'loss', 'content': 0.06468605250120163, 'timestamp': '2025-09-10 02:49:18.596683', 'step': 19796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:18.626315', 'step': 19796, 'epoch': 3} {'type': 'loss', 'content': 0.058638572692871094, 'timestamp': '2025-09-10 02:49:18.628591', 'step': 19797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:18.658205', 'step': 19797, 'epoch': 3} {'type': 'loss', 'content': 0.038332678377628326, 'timestamp': '2025-09-10 02:49:18.660852', 'step': 19798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.690400', 'step': 19798, 'epoch': 3} {'type': 'loss', 'content': 0.10473784804344177, 'timestamp': '2025-09-10 02:49:18.692586', 'step': 19799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.722136', 'step': 19799, 'epoch': 3} {'type': 'loss', 'content': 0.05616646632552147, 'timestamp': '2025-09-10 02:49:18.745816', 'step': 19800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.775904', 'step': 19800, 'epoch': 3} {'type': 'loss', 'content': 0.0993795245885849, 'timestamp': '2025-09-10 02:49:18.778022', 'step': 19801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:18.807200', 'step': 19801, 'epoch': 3} {'type': 'loss', 'content': 0.045206233859062195, 'timestamp': '2025-09-10 02:49:18.809226', 'step': 19802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:18.838270', 'step': 19802, 'epoch': 3} {'type': 'loss', 'content': 0.016140611842274666, 'timestamp': '2025-09-10 02:49:18.840929', 'step': 19803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:18.870176', 'step': 19803, 'epoch': 3} {'type': 'loss', 'content': 0.05909300968050957, 'timestamp': '2025-09-10 02:49:18.893804', 'step': 19804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:18.923268', 'step': 19804, 'epoch': 3} {'type': 'loss', 'content': 0.059517715126276016, 'timestamp': '2025-09-10 02:49:18.925238', 'step': 19805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:18.954162', 'step': 19805, 'epoch': 3} {'type': 'loss', 'content': 0.08263497799634933, 'timestamp': '2025-09-10 02:49:18.956353', 'step': 19806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:18.987159', 'step': 19806, 'epoch': 3} {'type': 'loss', 'content': 0.08158083260059357, 'timestamp': '2025-09-10 02:49:18.989274', 'step': 19807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.020571', 'step': 19807, 'epoch': 3} {'type': 'loss', 'content': 0.0431164987385273, 'timestamp': '2025-09-10 02:49:19.043841', 'step': 19808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.073299', 'step': 19808, 'epoch': 3} {'type': 'loss', 'content': 0.09458321332931519, 'timestamp': '2025-09-10 02:49:19.075558', 'step': 19809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:19.106043', 'step': 19809, 'epoch': 3} {'type': 'loss', 'content': 0.052662067115306854, 'timestamp': '2025-09-10 02:49:19.108211', 'step': 19810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:19.137537', 'step': 19810, 'epoch': 3} {'type': 'loss', 'content': 0.09196478128433228, 'timestamp': '2025-09-10 02:49:19.139677', 'step': 19811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.169229', 'step': 19811, 'epoch': 3} {'type': 'loss', 'content': 0.03344133123755455, 'timestamp': '2025-09-10 02:49:19.193046', 'step': 19812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:19.223957', 'step': 19812, 'epoch': 3} {'type': 'loss', 'content': 0.11673422902822495, 'timestamp': '2025-09-10 02:49:19.226246', 'step': 19813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:19.255688', 'step': 19813, 'epoch': 3} {'type': 'loss', 'content': 0.0965500921010971, 'timestamp': '2025-09-10 02:49:19.258121', 'step': 19814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:19.287368', 'step': 19814, 'epoch': 3} {'type': 'loss', 'content': 0.09075663983821869, 'timestamp': '2025-09-10 02:49:19.290084', 'step': 19815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.320906', 'step': 19815, 'epoch': 3} {'type': 'loss', 'content': 0.03761553391814232, 'timestamp': '2025-09-10 02:49:19.344475', 'step': 19816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:19.374298', 'step': 19816, 'epoch': 3} {'type': 'loss', 'content': 0.08967021852731705, 'timestamp': '2025-09-10 02:49:19.376464', 'step': 19817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.406000', 'step': 19817, 'epoch': 3} {'type': 'loss', 'content': 0.04883895441889763, 'timestamp': '2025-09-10 02:49:19.408138', 'step': 19818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:19.440312', 'step': 19818, 'epoch': 3} {'type': 'loss', 'content': 0.10455489903688431, 'timestamp': '2025-09-10 02:49:19.442367', 'step': 19819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:19.472003', 'step': 19819, 'epoch': 3} {'type': 'loss', 'content': 0.11727003008127213, 'timestamp': '2025-09-10 02:49:19.495359', 'step': 19820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:19.526496', 'step': 19820, 'epoch': 3} {'type': 'loss', 'content': 0.05706487223505974, 'timestamp': '2025-09-10 02:49:19.528498', 'step': 19821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:19.558514', 'step': 19821, 'epoch': 3} {'type': 'loss', 'content': 0.07488545775413513, 'timestamp': '2025-09-10 02:49:19.560768', 'step': 19822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:19.590573', 'step': 19822, 'epoch': 3} {'type': 'loss', 'content': 0.0266127809882164, 'timestamp': '2025-09-10 02:49:19.592956', 'step': 19823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.623604', 'step': 19823, 'epoch': 3} {'type': 'loss', 'content': 0.020965872332453728, 'timestamp': '2025-09-10 02:49:19.646896', 'step': 19824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.677121', 'step': 19824, 'epoch': 3} {'type': 'loss', 'content': 0.025493063032627106, 'timestamp': '2025-09-10 02:49:19.679258', 'step': 19825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:19.708732', 'step': 19825, 'epoch': 3} {'type': 'loss', 'content': 0.05672833323478699, 'timestamp': '2025-09-10 02:49:19.710910', 'step': 19826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.741056', 'step': 19826, 'epoch': 3} {'type': 'loss', 'content': 0.07572688162326813, 'timestamp': '2025-09-10 02:49:19.742991', 'step': 19827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:19.772480', 'step': 19827, 'epoch': 3} {'type': 'loss', 'content': 0.06980917602777481, 'timestamp': '2025-09-10 02:49:19.795981', 'step': 19828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:19.826896', 'step': 19828, 'epoch': 3} {'type': 'loss', 'content': 0.03725380077958107, 'timestamp': '2025-09-10 02:49:19.829229', 'step': 19829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:19.859350', 'step': 19829, 'epoch': 3} {'type': 'loss', 'content': 0.06973392516374588, 'timestamp': '2025-09-10 02:49:19.861494', 'step': 19830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.891529', 'step': 19830, 'epoch': 3} {'type': 'loss', 'content': 0.08744698017835617, 'timestamp': '2025-09-10 02:49:19.893786', 'step': 19831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:19.925400', 'step': 19831, 'epoch': 3} {'type': 'loss', 'content': 0.0766029953956604, 'timestamp': '2025-09-10 02:49:19.948873', 'step': 19832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:19.979063', 'step': 19832, 'epoch': 3} {'type': 'loss', 'content': 0.036921426653862, 'timestamp': '2025-09-10 02:49:19.981096', 'step': 19833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:20.010489', 'step': 19833, 'epoch': 3} {'type': 'loss', 'content': 0.1955384463071823, 'timestamp': '2025-09-10 02:49:20.013051', 'step': 19834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:20.047399', 'step': 19834, 'epoch': 3} {'type': 'loss', 'content': 0.07399065047502518, 'timestamp': '2025-09-10 02:49:20.050076', 'step': 19835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:20.080204', 'step': 19835, 'epoch': 3} {'type': 'loss', 'content': 0.03837354853749275, 'timestamp': '2025-09-10 02:49:20.103564', 'step': 19836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:20.133730', 'step': 19836, 'epoch': 3} {'type': 'loss', 'content': 0.06353334337472916, 'timestamp': '2025-09-10 02:49:20.135868', 'step': 19837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:20.165415', 'step': 19837, 'epoch': 3} {'type': 'loss', 'content': 0.03746449574828148, 'timestamp': '2025-09-10 02:49:20.167827', 'step': 19838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:20.197800', 'step': 19838, 'epoch': 3} {'type': 'loss', 'content': 0.044384151697158813, 'timestamp': '2025-09-10 02:49:20.199864', 'step': 19839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:20.229206', 'step': 19839, 'epoch': 3} {'type': 'loss', 'content': 0.08684911578893661, 'timestamp': '2025-09-10 02:49:20.252757', 'step': 19840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:20.281558', 'step': 19840, 'epoch': 3} {'type': 'loss', 'content': 0.13323253393173218, 'timestamp': '2025-09-10 02:49:20.283684', 'step': 19841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:20.313506', 'step': 19841, 'epoch': 3} {'type': 'loss', 'content': 0.03231466934084892, 'timestamp': '2025-09-10 02:49:20.315570', 'step': 19842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:20.346589', 'step': 19842, 'epoch': 3} {'type': 'loss', 'content': 0.07087468355894089, 'timestamp': '2025-09-10 02:49:20.348651', 'step': 19843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:20.378330', 'step': 19843, 'epoch': 3} {'type': 'loss', 'content': 0.04768165946006775, 'timestamp': '2025-09-10 02:49:20.403339', 'step': 19844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:20.433322', 'step': 19844, 'epoch': 3} {'type': 'loss', 'content': 0.1089768186211586, 'timestamp': '2025-09-10 02:49:20.435734', 'step': 19845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:20.465967', 'step': 19845, 'epoch': 3} {'type': 'loss', 'content': 0.08314327150583267, 'timestamp': '2025-09-10 02:49:20.468090', 'step': 19846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:20.497675', 'step': 19846, 'epoch': 3} {'type': 'loss', 'content': 0.14921210706233978, 'timestamp': '2025-09-10 02:49:20.499957', 'step': 19847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:20.529602', 'step': 19847, 'epoch': 3} {'type': 'loss', 'content': 0.10012132674455643, 'timestamp': '2025-09-10 02:49:20.553073', 'step': 19848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:20.582953', 'step': 19848, 'epoch': 3} {'type': 'loss', 'content': 0.06277791410684586, 'timestamp': '2025-09-10 02:49:20.585319', 'step': 19849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:20.615723', 'step': 19849, 'epoch': 3} {'type': 'loss', 'content': 0.11774443835020065, 'timestamp': '2025-09-10 02:49:20.618103', 'step': 19850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:20.648140', 'step': 19850, 'epoch': 3} {'type': 'loss', 'content': 0.06073706969618797, 'timestamp': '2025-09-10 02:49:20.650564', 'step': 19851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:20.681736', 'step': 19851, 'epoch': 3} {'type': 'loss', 'content': 0.0855444073677063, 'timestamp': '2025-09-10 02:49:20.706480', 'step': 19852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:20.736545', 'step': 19852, 'epoch': 3} {'type': 'loss', 'content': 0.02422117069363594, 'timestamp': '2025-09-10 02:49:20.738698', 'step': 19853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:20.767999', 'step': 19853, 'epoch': 3} {'type': 'loss', 'content': 0.16709615290164948, 'timestamp': '2025-09-10 02:49:20.770307', 'step': 19854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:20.800282', 'step': 19854, 'epoch': 3} {'type': 'loss', 'content': 0.06470037996768951, 'timestamp': '2025-09-10 02:49:20.802581', 'step': 19855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:20.832506', 'step': 19855, 'epoch': 3} {'type': 'loss', 'content': 0.06480590999126434, 'timestamp': '2025-09-10 02:49:20.856332', 'step': 19856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:20.886057', 'step': 19856, 'epoch': 3} {'type': 'loss', 'content': 0.09839271008968353, 'timestamp': '2025-09-10 02:49:20.888662', 'step': 19857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:20.920021', 'step': 19857, 'epoch': 3} {'type': 'loss', 'content': 0.09194837510585785, 'timestamp': '2025-09-10 02:49:20.922286', 'step': 19858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:20.952405', 'step': 19858, 'epoch': 3} {'type': 'loss', 'content': 0.10111834108829498, 'timestamp': '2025-09-10 02:49:20.954575', 'step': 19859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:20.983587', 'step': 19859, 'epoch': 3} {'type': 'loss', 'content': 0.05778970569372177, 'timestamp': '2025-09-10 02:49:21.006989', 'step': 19860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:21.036885', 'step': 19860, 'epoch': 3} {'type': 'loss', 'content': 0.06286345422267914, 'timestamp': '2025-09-10 02:49:21.039330', 'step': 19861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:21.069472', 'step': 19861, 'epoch': 3} {'type': 'loss', 'content': 0.09228694438934326, 'timestamp': '2025-09-10 02:49:21.071688', 'step': 19862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:21.101284', 'step': 19862, 'epoch': 3} {'type': 'loss', 'content': 0.027734851464629173, 'timestamp': '2025-09-10 02:49:21.103991', 'step': 19863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:21.133640', 'step': 19863, 'epoch': 3} {'type': 'loss', 'content': 0.18889524042606354, 'timestamp': '2025-09-10 02:49:21.157478', 'step': 19864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:21.187946', 'step': 19864, 'epoch': 3} {'type': 'loss', 'content': 0.11026248335838318, 'timestamp': '2025-09-10 02:49:21.191570', 'step': 19865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:21.222128', 'step': 19865, 'epoch': 3} {'type': 'loss', 'content': 0.11285043507814407, 'timestamp': '2025-09-10 02:49:21.225013', 'step': 19866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:21.255096', 'step': 19866, 'epoch': 3} {'type': 'loss', 'content': 0.14659006893634796, 'timestamp': '2025-09-10 02:49:21.257487', 'step': 19867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:21.286520', 'step': 19867, 'epoch': 3} {'type': 'loss', 'content': 0.05254398286342621, 'timestamp': '2025-09-10 02:49:21.309998', 'step': 19868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:21.339670', 'step': 19868, 'epoch': 3} {'type': 'loss', 'content': 0.09335913509130478, 'timestamp': '2025-09-10 02:49:21.341968', 'step': 19869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:21.371879', 'step': 19869, 'epoch': 3} {'type': 'loss', 'content': 0.03015928715467453, 'timestamp': '2025-09-10 02:49:21.374233', 'step': 19870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:21.403846', 'step': 19870, 'epoch': 3} {'type': 'loss', 'content': 0.04230911657214165, 'timestamp': '2025-09-10 02:49:21.405973', 'step': 19871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:21.435645', 'step': 19871, 'epoch': 3} {'type': 'loss', 'content': 0.06713742017745972, 'timestamp': '2025-09-10 02:49:21.459218', 'step': 19872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:21.490238', 'step': 19872, 'epoch': 3} {'type': 'loss', 'content': 0.12829598784446716, 'timestamp': '2025-09-10 02:49:21.492672', 'step': 19873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:21.524324', 'step': 19873, 'epoch': 3} {'type': 'loss', 'content': 0.07196866720914841, 'timestamp': '2025-09-10 02:49:21.526761', 'step': 19874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:21.557127', 'step': 19874, 'epoch': 3} {'type': 'loss', 'content': 0.08149504661560059, 'timestamp': '2025-09-10 02:49:21.559448', 'step': 19875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:21.588667', 'step': 19875, 'epoch': 3} {'type': 'loss', 'content': 0.08084134012460709, 'timestamp': '2025-09-10 02:49:21.612020', 'step': 19876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:21.642022', 'step': 19876, 'epoch': 3} {'type': 'loss', 'content': 0.07317084819078445, 'timestamp': '2025-09-10 02:49:21.644091', 'step': 19877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:21.674023', 'step': 19877, 'epoch': 3} {'type': 'loss', 'content': 0.05803743377327919, 'timestamp': '2025-09-10 02:49:21.676184', 'step': 19878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:21.707552', 'step': 19878, 'epoch': 3} {'type': 'loss', 'content': 0.07746211439371109, 'timestamp': '2025-09-10 02:49:21.709855', 'step': 19879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:21.739905', 'step': 19879, 'epoch': 3} {'type': 'loss', 'content': 0.074112668633461, 'timestamp': '2025-09-10 02:49:21.763388', 'step': 19880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:21.793202', 'step': 19880, 'epoch': 3} {'type': 'loss', 'content': 0.05602259933948517, 'timestamp': '2025-09-10 02:49:21.804308', 'step': 19881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:21.837314', 'step': 19881, 'epoch': 3} {'type': 'loss', 'content': 0.13129132986068726, 'timestamp': '2025-09-10 02:49:21.841598', 'step': 19882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:21.875903', 'step': 19882, 'epoch': 3} {'type': 'loss', 'content': 0.029820645228028297, 'timestamp': '2025-09-10 02:49:21.878074', 'step': 19883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:21.908468', 'step': 19883, 'epoch': 3} {'type': 'loss', 'content': 0.05541720241308212, 'timestamp': '2025-09-10 02:49:21.932300', 'step': 19884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:21.962690', 'step': 19884, 'epoch': 3} {'type': 'loss', 'content': 0.07966464757919312, 'timestamp': '2025-09-10 02:49:21.964912', 'step': 19885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:21.994178', 'step': 19885, 'epoch': 3} {'type': 'loss', 'content': 0.14402811229228973, 'timestamp': '2025-09-10 02:49:21.996884', 'step': 19886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:22.029305', 'step': 19886, 'epoch': 3} {'type': 'loss', 'content': 0.04065227508544922, 'timestamp': '2025-09-10 02:49:22.032106', 'step': 19887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:22.062452', 'step': 19887, 'epoch': 3} {'type': 'loss', 'content': 0.028297169134020805, 'timestamp': '2025-09-10 02:49:22.086013', 'step': 19888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:22.115866', 'step': 19888, 'epoch': 3} {'type': 'loss', 'content': 0.17822898924350739, 'timestamp': '2025-09-10 02:49:22.118024', 'step': 19889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:22.148010', 'step': 19889, 'epoch': 3} {'type': 'loss', 'content': 0.1054338812828064, 'timestamp': '2025-09-10 02:49:22.150216', 'step': 19890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:22.180322', 'step': 19890, 'epoch': 3} {'type': 'loss', 'content': 0.04836726561188698, 'timestamp': '2025-09-10 02:49:22.182839', 'step': 19891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:22.213488', 'step': 19891, 'epoch': 3} {'type': 'loss', 'content': 0.00581876328215003, 'timestamp': '2025-09-10 02:49:22.236925', 'step': 19892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:22.268272', 'step': 19892, 'epoch': 3} {'type': 'loss', 'content': 0.0736762136220932, 'timestamp': '2025-09-10 02:49:22.270484', 'step': 19893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:22.300481', 'step': 19893, 'epoch': 3} {'type': 'loss', 'content': 0.10576561093330383, 'timestamp': '2025-09-10 02:49:22.305534', 'step': 19894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:22.336502', 'step': 19894, 'epoch': 3} {'type': 'loss', 'content': 0.032285142689943314, 'timestamp': '2025-09-10 02:49:22.338867', 'step': 19895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:22.368710', 'step': 19895, 'epoch': 3} {'type': 'loss', 'content': 0.06592331826686859, 'timestamp': '2025-09-10 02:49:22.392089', 'step': 19896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:22.421751', 'step': 19896, 'epoch': 3} {'type': 'loss', 'content': 0.05958343297243118, 'timestamp': '2025-09-10 02:49:22.424010', 'step': 19897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:22.455993', 'step': 19897, 'epoch': 3} {'type': 'loss', 'content': 0.0444522388279438, 'timestamp': '2025-09-10 02:49:22.458865', 'step': 19898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:22.488727', 'step': 19898, 'epoch': 3} {'type': 'loss', 'content': 0.17742574214935303, 'timestamp': '2025-09-10 02:49:22.491092', 'step': 19899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:22.520725', 'step': 19899, 'epoch': 3} {'type': 'loss', 'content': 0.08752327412366867, 'timestamp': '2025-09-10 02:49:22.544105', 'step': 19900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:22.574756', 'step': 19900, 'epoch': 3} {'type': 'loss', 'content': 0.0958583801984787, 'timestamp': '2025-09-10 02:49:22.577248', 'step': 19901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:22.607187', 'step': 19901, 'epoch': 3} {'type': 'loss', 'content': 0.047852784395217896, 'timestamp': '2025-09-10 02:49:22.611192', 'step': 19902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:22.642018', 'step': 19902, 'epoch': 3} {'type': 'loss', 'content': 0.11637432873249054, 'timestamp': '2025-09-10 02:49:22.644094', 'step': 19903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:22.673728', 'step': 19903, 'epoch': 3} {'type': 'loss', 'content': 0.08622311800718307, 'timestamp': '2025-09-10 02:49:22.696964', 'step': 19904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:22.728454', 'step': 19904, 'epoch': 3} {'type': 'loss', 'content': 0.0706353485584259, 'timestamp': '2025-09-10 02:49:22.730828', 'step': 19905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:22.760733', 'step': 19905, 'epoch': 3} {'type': 'loss', 'content': 0.08679569512605667, 'timestamp': '2025-09-10 02:49:22.762997', 'step': 19906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:22.792770', 'step': 19906, 'epoch': 3} {'type': 'loss', 'content': 0.11953271180391312, 'timestamp': '2025-09-10 02:49:22.794933', 'step': 19907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:22.824972', 'step': 19907, 'epoch': 3} {'type': 'loss', 'content': 0.08012769371271133, 'timestamp': '2025-09-10 02:49:22.848778', 'step': 19908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:22.878816', 'step': 19908, 'epoch': 3} {'type': 'loss', 'content': 0.04045674949884415, 'timestamp': '2025-09-10 02:49:22.881268', 'step': 19909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:22.912350', 'step': 19909, 'epoch': 3} {'type': 'loss', 'content': 0.04777572304010391, 'timestamp': '2025-09-10 02:49:22.915138', 'step': 19910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:22.944767', 'step': 19910, 'epoch': 3} {'type': 'loss', 'content': 0.11253651976585388, 'timestamp': '2025-09-10 02:49:22.947086', 'step': 19911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:22.976746', 'step': 19911, 'epoch': 3} {'type': 'loss', 'content': 0.09074734151363373, 'timestamp': '2025-09-10 02:49:23.000102', 'step': 19912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:23.030561', 'step': 19912, 'epoch': 3} {'type': 'loss', 'content': 0.06065840274095535, 'timestamp': '2025-09-10 02:49:23.037159', 'step': 19913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:23.073986', 'step': 19913, 'epoch': 3} {'type': 'loss', 'content': 0.14111877977848053, 'timestamp': '2025-09-10 02:49:23.076435', 'step': 19914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:23.106301', 'step': 19914, 'epoch': 3} {'type': 'loss', 'content': 0.08668763935565948, 'timestamp': '2025-09-10 02:49:23.110072', 'step': 19915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:23.145373', 'step': 19915, 'epoch': 3} {'type': 'loss', 'content': 0.04336659982800484, 'timestamp': '2025-09-10 02:49:23.168919', 'step': 19916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:23.200231', 'step': 19916, 'epoch': 3} {'type': 'loss', 'content': 0.05814008042216301, 'timestamp': '2025-09-10 02:49:23.202591', 'step': 19917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:23.233214', 'step': 19917, 'epoch': 3} {'type': 'loss', 'content': 0.05523698776960373, 'timestamp': '2025-09-10 02:49:23.235777', 'step': 19918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:23.266184', 'step': 19918, 'epoch': 3} {'type': 'loss', 'content': 0.09797971695661545, 'timestamp': '2025-09-10 02:49:23.268144', 'step': 19919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:49:23.297865', 'step': 19919, 'epoch': 3} {'type': 'loss', 'content': 0.05237716808915138, 'timestamp': '2025-09-10 02:49:23.323286', 'step': 19920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:23.353085', 'step': 19920, 'epoch': 3} {'type': 'loss', 'content': 0.06797171384096146, 'timestamp': '2025-09-10 02:49:23.355381', 'step': 19921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:23.385163', 'step': 19921, 'epoch': 3} {'type': 'loss', 'content': 0.05834042653441429, 'timestamp': '2025-09-10 02:49:23.387090', 'step': 19922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:23.416454', 'step': 19922, 'epoch': 3} {'type': 'loss', 'content': 0.11838510632514954, 'timestamp': '2025-09-10 02:49:23.420293', 'step': 19923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:23.450625', 'step': 19923, 'epoch': 3} {'type': 'loss', 'content': 0.055567461997270584, 'timestamp': '2025-09-10 02:49:23.474390', 'step': 19924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:23.504141', 'step': 19924, 'epoch': 3} {'type': 'loss', 'content': 0.025131771340966225, 'timestamp': '2025-09-10 02:49:23.506489', 'step': 19925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:23.536713', 'step': 19925, 'epoch': 3} {'type': 'loss', 'content': 0.03374062106013298, 'timestamp': '2025-09-10 02:49:23.540148', 'step': 19926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:23.570552', 'step': 19926, 'epoch': 3} {'type': 'loss', 'content': 0.06476865708827972, 'timestamp': '2025-09-10 02:49:23.572571', 'step': 19927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:23.602015', 'step': 19927, 'epoch': 3} {'type': 'loss', 'content': 0.070867158472538, 'timestamp': '2025-09-10 02:49:23.625131', 'step': 19928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:23.655310', 'step': 19928, 'epoch': 3} {'type': 'loss', 'content': 0.05417617782950401, 'timestamp': '2025-09-10 02:49:23.657670', 'step': 19929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:23.687596', 'step': 19929, 'epoch': 3} {'type': 'loss', 'content': 0.06149425357580185, 'timestamp': '2025-09-10 02:49:23.690138', 'step': 19930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:23.719636', 'step': 19930, 'epoch': 3} {'type': 'loss', 'content': 0.12350253015756607, 'timestamp': '2025-09-10 02:49:23.723499', 'step': 19931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:23.753777', 'step': 19931, 'epoch': 3} {'type': 'loss', 'content': 0.04154141992330551, 'timestamp': '2025-09-10 02:49:23.777265', 'step': 19932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:23.807169', 'step': 19932, 'epoch': 3} {'type': 'loss', 'content': 0.09479320794343948, 'timestamp': '2025-09-10 02:49:23.809271', 'step': 19933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:23.838896', 'step': 19933, 'epoch': 3} {'type': 'loss', 'content': 0.07304510474205017, 'timestamp': '2025-09-10 02:49:23.841251', 'step': 19934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:23.870868', 'step': 19934, 'epoch': 3} {'type': 'loss', 'content': 0.029727289453148842, 'timestamp': '2025-09-10 02:49:23.873092', 'step': 19935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:23.901916', 'step': 19935, 'epoch': 3} {'type': 'loss', 'content': 0.053627241402864456, 'timestamp': '2025-09-10 02:49:23.927236', 'step': 19936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:23.962239', 'step': 19936, 'epoch': 3} {'type': 'loss', 'content': 0.12562449276447296, 'timestamp': '2025-09-10 02:49:23.965583', 'step': 19937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:24.002697', 'step': 19937, 'epoch': 3} {'type': 'loss', 'content': 0.08331331610679626, 'timestamp': '2025-09-10 02:49:24.007655', 'step': 19938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:24.044535', 'step': 19938, 'epoch': 3} {'type': 'loss', 'content': 0.07825008779764175, 'timestamp': '2025-09-10 02:49:24.051109', 'step': 19939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:24.088575', 'step': 19939, 'epoch': 3} {'type': 'loss', 'content': 0.07393268495798111, 'timestamp': '2025-09-10 02:49:24.113987', 'step': 19940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:24.149476', 'step': 19940, 'epoch': 3} {'type': 'loss', 'content': 0.027730682864785194, 'timestamp': '2025-09-10 02:49:24.153461', 'step': 19941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:24.186272', 'step': 19941, 'epoch': 3} {'type': 'loss', 'content': 0.052826788276433945, 'timestamp': '2025-09-10 02:49:24.190221', 'step': 19942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:24.229271', 'step': 19942, 'epoch': 3} {'type': 'loss', 'content': 0.0767814964056015, 'timestamp': '2025-09-10 02:49:24.233674', 'step': 19943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:24.268293', 'step': 19943, 'epoch': 3} {'type': 'loss', 'content': 0.07597993314266205, 'timestamp': '2025-09-10 02:49:24.293215', 'step': 19944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:24.328409', 'step': 19944, 'epoch': 3} {'type': 'loss', 'content': 0.12577952444553375, 'timestamp': '2025-09-10 02:49:24.332213', 'step': 19945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:24.361791', 'step': 19945, 'epoch': 3} {'type': 'loss', 'content': 0.0813533365726471, 'timestamp': '2025-09-10 02:49:24.365821', 'step': 19946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:24.398938', 'step': 19946, 'epoch': 3} {'type': 'loss', 'content': 0.10535375028848648, 'timestamp': '2025-09-10 02:49:24.401279', 'step': 19947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:24.431463', 'step': 19947, 'epoch': 3} {'type': 'loss', 'content': 0.048898905515670776, 'timestamp': '2025-09-10 02:49:24.457440', 'step': 19948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:24.488270', 'step': 19948, 'epoch': 3} {'type': 'loss', 'content': 0.0479525551199913, 'timestamp': '2025-09-10 02:49:24.490512', 'step': 19949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:24.520094', 'step': 19949, 'epoch': 3} {'type': 'loss', 'content': 0.04576442390680313, 'timestamp': '2025-09-10 02:49:24.522344', 'step': 19950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:24.552876', 'step': 19950, 'epoch': 3} {'type': 'loss', 'content': 0.07163157314062119, 'timestamp': '2025-09-10 02:49:24.555173', 'step': 19951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:24.585769', 'step': 19951, 'epoch': 3} {'type': 'loss', 'content': 0.09309028834104538, 'timestamp': '2025-09-10 02:49:24.609099', 'step': 19952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:24.639481', 'step': 19952, 'epoch': 3} {'type': 'loss', 'content': 0.07116194069385529, 'timestamp': '2025-09-10 02:49:24.642071', 'step': 19953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:24.671820', 'step': 19953, 'epoch': 3} {'type': 'loss', 'content': 0.06716546416282654, 'timestamp': '2025-09-10 02:49:24.673888', 'step': 19954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:24.706313', 'step': 19954, 'epoch': 3} {'type': 'loss', 'content': 0.056164052337408066, 'timestamp': '2025-09-10 02:49:24.709353', 'step': 19955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:24.740131', 'step': 19955, 'epoch': 3} {'type': 'loss', 'content': 0.05783569812774658, 'timestamp': '2025-09-10 02:49:24.763964', 'step': 19956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:24.793508', 'step': 19956, 'epoch': 3} {'type': 'loss', 'content': 0.08262266218662262, 'timestamp': '2025-09-10 02:49:24.795790', 'step': 19957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:24.824844', 'step': 19957, 'epoch': 3} {'type': 'loss', 'content': 0.06286142021417618, 'timestamp': '2025-09-10 02:49:24.827278', 'step': 19958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:24.857029', 'step': 19958, 'epoch': 3} {'type': 'loss', 'content': 0.04943298175930977, 'timestamp': '2025-09-10 02:49:24.859346', 'step': 19959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:24.889774', 'step': 19959, 'epoch': 3} {'type': 'loss', 'content': 0.12073180079460144, 'timestamp': '2025-09-10 02:49:24.913334', 'step': 19960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:24.942993', 'step': 19960, 'epoch': 3} {'type': 'loss', 'content': 0.036455873399972916, 'timestamp': '2025-09-10 02:49:24.945480', 'step': 19961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:24.975168', 'step': 19961, 'epoch': 3} {'type': 'loss', 'content': 0.09497196227312088, 'timestamp': '2025-09-10 02:49:24.976958', 'step': 19962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:25.005773', 'step': 19962, 'epoch': 3} {'type': 'loss', 'content': 0.07912541180849075, 'timestamp': '2025-09-10 02:49:25.007999', 'step': 19963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:25.038199', 'step': 19963, 'epoch': 3} {'type': 'loss', 'content': 0.04367996007204056, 'timestamp': '2025-09-10 02:49:25.062641', 'step': 19964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.093416', 'step': 19964, 'epoch': 3} {'type': 'loss', 'content': 0.051627106964588165, 'timestamp': '2025-09-10 02:49:25.096432', 'step': 19965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:25.129785', 'step': 19965, 'epoch': 3} {'type': 'loss', 'content': 0.04665721207857132, 'timestamp': '2025-09-10 02:49:25.132811', 'step': 19966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.167015', 'step': 19966, 'epoch': 3} {'type': 'loss', 'content': 0.07206111401319504, 'timestamp': '2025-09-10 02:49:25.169705', 'step': 19967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.203743', 'step': 19967, 'epoch': 3} {'type': 'loss', 'content': 0.09404053539037704, 'timestamp': '2025-09-10 02:49:25.227301', 'step': 19968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:49:25.258460', 'step': 19968, 'epoch': 3} {'type': 'loss', 'content': 0.04424720257520676, 'timestamp': '2025-09-10 02:49:25.260901', 'step': 19969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.290496', 'step': 19969, 'epoch': 3} {'type': 'loss', 'content': 0.05537774786353111, 'timestamp': '2025-09-10 02:49:25.292554', 'step': 19970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:25.322198', 'step': 19970, 'epoch': 3} {'type': 'loss', 'content': 0.028222592547535896, 'timestamp': '2025-09-10 02:49:25.324550', 'step': 19971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:25.354319', 'step': 19971, 'epoch': 3} {'type': 'loss', 'content': 0.09232289344072342, 'timestamp': '2025-09-10 02:49:25.377586', 'step': 19972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:25.407174', 'step': 19972, 'epoch': 3} {'type': 'loss', 'content': 0.04664158076047897, 'timestamp': '2025-09-10 02:49:25.409494', 'step': 19973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:25.440876', 'step': 19973, 'epoch': 3} {'type': 'loss', 'content': 0.04131219908595085, 'timestamp': '2025-09-10 02:49:25.443161', 'step': 19974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:25.474595', 'step': 19974, 'epoch': 3} {'type': 'loss', 'content': 0.06901230663061142, 'timestamp': '2025-09-10 02:49:25.477432', 'step': 19975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.507410', 'step': 19975, 'epoch': 3} {'type': 'loss', 'content': 0.025172501802444458, 'timestamp': '2025-09-10 02:49:25.530792', 'step': 19976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:25.560553', 'step': 19976, 'epoch': 3} {'type': 'loss', 'content': 0.13597576320171356, 'timestamp': '2025-09-10 02:49:25.562838', 'step': 19977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.592230', 'step': 19977, 'epoch': 3} {'type': 'loss', 'content': 0.06256718188524246, 'timestamp': '2025-09-10 02:49:25.594616', 'step': 19978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:25.624596', 'step': 19978, 'epoch': 3} {'type': 'loss', 'content': 0.04327788203954697, 'timestamp': '2025-09-10 02:49:25.627817', 'step': 19979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:25.657152', 'step': 19979, 'epoch': 3} {'type': 'loss', 'content': 0.038586363196372986, 'timestamp': '2025-09-10 02:49:25.680548', 'step': 19980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:25.710726', 'step': 19980, 'epoch': 3} {'type': 'loss', 'content': 0.04283769056200981, 'timestamp': '2025-09-10 02:49:25.713188', 'step': 19981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.743449', 'step': 19981, 'epoch': 3} {'type': 'loss', 'content': 0.02691325731575489, 'timestamp': '2025-09-10 02:49:25.745443', 'step': 19982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.776332', 'step': 19982, 'epoch': 3} {'type': 'loss', 'content': 0.05760972201824188, 'timestamp': '2025-09-10 02:49:25.778553', 'step': 19983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.807700', 'step': 19983, 'epoch': 3} {'type': 'loss', 'content': 0.10353472083806992, 'timestamp': '2025-09-10 02:49:25.831083', 'step': 19984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.861117', 'step': 19984, 'epoch': 3} {'type': 'loss', 'content': 0.06834674626588821, 'timestamp': '2025-09-10 02:49:25.863820', 'step': 19985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:25.893540', 'step': 19985, 'epoch': 3} {'type': 'loss', 'content': 0.07036803662776947, 'timestamp': '2025-09-10 02:49:25.895677', 'step': 19986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:25.924705', 'step': 19986, 'epoch': 3} {'type': 'loss', 'content': 0.06335864961147308, 'timestamp': '2025-09-10 02:49:25.928535', 'step': 19987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:25.968837', 'step': 19987, 'epoch': 3} {'type': 'loss', 'content': 0.007778830360621214, 'timestamp': '2025-09-10 02:49:25.992839', 'step': 19988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:26.023839', 'step': 19988, 'epoch': 3} {'type': 'loss', 'content': 0.04631653428077698, 'timestamp': '2025-09-10 02:49:26.026260', 'step': 19989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:26.056159', 'step': 19989, 'epoch': 3} {'type': 'loss', 'content': 0.06349816918373108, 'timestamp': '2025-09-10 02:49:26.058685', 'step': 19990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:26.091081', 'step': 19990, 'epoch': 3} {'type': 'loss', 'content': 0.07185153663158417, 'timestamp': '2025-09-10 02:49:26.093755', 'step': 19991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:26.123244', 'step': 19991, 'epoch': 3} {'type': 'loss', 'content': 0.027501534670591354, 'timestamp': '2025-09-10 02:49:26.147067', 'step': 19992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:26.181825', 'step': 19992, 'epoch': 3} {'type': 'loss', 'content': 0.047644391655921936, 'timestamp': '2025-09-10 02:49:26.189144', 'step': 19993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:26.228343', 'step': 19993, 'epoch': 3} {'type': 'loss', 'content': 0.07230740785598755, 'timestamp': '2025-09-10 02:49:26.230854', 'step': 19994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:26.262291', 'step': 19994, 'epoch': 3} {'type': 'loss', 'content': 0.04048525169491768, 'timestamp': '2025-09-10 02:49:26.265511', 'step': 19995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:26.296379', 'step': 19995, 'epoch': 3} {'type': 'loss', 'content': 0.0643431767821312, 'timestamp': '2025-09-10 02:49:26.319850', 'step': 19996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:26.351471', 'step': 19996, 'epoch': 3} {'type': 'loss', 'content': 0.030651908367872238, 'timestamp': '2025-09-10 02:49:26.354393', 'step': 19997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:26.387829', 'step': 19997, 'epoch': 3} {'type': 'loss', 'content': 0.0884058028459549, 'timestamp': '2025-09-10 02:49:26.390435', 'step': 19998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:26.421657', 'step': 19998, 'epoch': 3} {'type': 'loss', 'content': 0.08221792429685593, 'timestamp': '2025-09-10 02:49:26.424471', 'step': 19999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:26.455107', 'step': 19999, 'epoch': 3} {'type': 'loss', 'content': 0.015586520545184612, 'timestamp': '2025-09-10 02:49:26.480876', 'step': 20000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20000', 'timestamp': '2025-09-10 02:49:31.266891', 'step': 20000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:31.311630', 'step': 20000, 'epoch': 3} {'type': 'loss', 'content': 0.05402610823512077, 'timestamp': '2025-09-10 02:49:31.314416', 'step': 20001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:31.346297', 'step': 20001, 'epoch': 3} {'type': 'loss', 'content': 0.1380055993795395, 'timestamp': '2025-09-10 02:49:31.348683', 'step': 20002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:31.379231', 'step': 20002, 'epoch': 3} {'type': 'loss', 'content': 0.055430904030799866, 'timestamp': '2025-09-10 02:49:31.381808', 'step': 20003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:31.411546', 'step': 20003, 'epoch': 3} {'type': 'loss', 'content': 0.04132416099309921, 'timestamp': '2025-09-10 02:49:31.435625', 'step': 20004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:31.465539', 'step': 20004, 'epoch': 3} {'type': 'loss', 'content': 0.0789768174290657, 'timestamp': '2025-09-10 02:49:31.467732', 'step': 20005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:31.497054', 'step': 20005, 'epoch': 3} {'type': 'loss', 'content': 0.049833230674266815, 'timestamp': '2025-09-10 02:49:31.499597', 'step': 20006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:31.529712', 'step': 20006, 'epoch': 3} {'type': 'loss', 'content': 0.11329387128353119, 'timestamp': '2025-09-10 02:49:31.532143', 'step': 20007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:31.562027', 'step': 20007, 'epoch': 3} {'type': 'loss', 'content': 0.04512343183159828, 'timestamp': '2025-09-10 02:49:31.586089', 'step': 20008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:31.615675', 'step': 20008, 'epoch': 3} {'type': 'loss', 'content': 0.055567190051078796, 'timestamp': '2025-09-10 02:49:31.617863', 'step': 20009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:31.649423', 'step': 20009, 'epoch': 3} {'type': 'loss', 'content': 0.13568010926246643, 'timestamp': '2025-09-10 02:49:31.651599', 'step': 20010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:31.681209', 'step': 20010, 'epoch': 3} {'type': 'loss', 'content': 0.08690398931503296, 'timestamp': '2025-09-10 02:49:31.683289', 'step': 20011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:31.712879', 'step': 20011, 'epoch': 3} {'type': 'loss', 'content': 0.08105476945638657, 'timestamp': '2025-09-10 02:49:31.736776', 'step': 20012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:31.769744', 'step': 20012, 'epoch': 3} {'type': 'loss', 'content': 0.012821419164538383, 'timestamp': '2025-09-10 02:49:31.771904', 'step': 20013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:31.801454', 'step': 20013, 'epoch': 3} {'type': 'loss', 'content': 0.11019690334796906, 'timestamp': '2025-09-10 02:49:31.803668', 'step': 20014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:31.833030', 'step': 20014, 'epoch': 3} {'type': 'loss', 'content': 0.16206274926662445, 'timestamp': '2025-09-10 02:49:31.835257', 'step': 20015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:31.865160', 'step': 20015, 'epoch': 3} {'type': 'loss', 'content': 0.11448049545288086, 'timestamp': '2025-09-10 02:49:31.888857', 'step': 20016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:31.918770', 'step': 20016, 'epoch': 3} {'type': 'loss', 'content': 0.04955296590924263, 'timestamp': '2025-09-10 02:49:31.921071', 'step': 20017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:31.954173', 'step': 20017, 'epoch': 3} {'type': 'loss', 'content': 0.07947923988103867, 'timestamp': '2025-09-10 02:49:31.956311', 'step': 20018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:31.986213', 'step': 20018, 'epoch': 3} {'type': 'loss', 'content': 0.1106756329536438, 'timestamp': '2025-09-10 02:49:31.988322', 'step': 20019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:32.017702', 'step': 20019, 'epoch': 3} {'type': 'loss', 'content': 0.09056723862886429, 'timestamp': '2025-09-10 02:49:32.041391', 'step': 20020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:32.071508', 'step': 20020, 'epoch': 3} {'type': 'loss', 'content': 0.020890219137072563, 'timestamp': '2025-09-10 02:49:32.074115', 'step': 20021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:32.103893', 'step': 20021, 'epoch': 3} {'type': 'loss', 'content': 0.08731155097484589, 'timestamp': '2025-09-10 02:49:32.105931', 'step': 20022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:32.135287', 'step': 20022, 'epoch': 3} {'type': 'loss', 'content': 0.14649614691734314, 'timestamp': '2025-09-10 02:49:32.137311', 'step': 20023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:32.166857', 'step': 20023, 'epoch': 3} {'type': 'loss', 'content': 0.10454507917165756, 'timestamp': '2025-09-10 02:49:32.190385', 'step': 20024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:32.219724', 'step': 20024, 'epoch': 3} {'type': 'loss', 'content': 0.07102809101343155, 'timestamp': '2025-09-10 02:49:32.222146', 'step': 20025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:32.251415', 'step': 20025, 'epoch': 3} {'type': 'loss', 'content': 0.016403893008828163, 'timestamp': '2025-09-10 02:49:32.253512', 'step': 20026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:32.283039', 'step': 20026, 'epoch': 3} {'type': 'loss', 'content': 0.0601663663983345, 'timestamp': '2025-09-10 02:49:32.285379', 'step': 20027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:32.315132', 'step': 20027, 'epoch': 3} {'type': 'loss', 'content': 0.052371442317962646, 'timestamp': '2025-09-10 02:49:32.338431', 'step': 20028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:32.367758', 'step': 20028, 'epoch': 3} {'type': 'loss', 'content': 0.11969327926635742, 'timestamp': '2025-09-10 02:49:32.370519', 'step': 20029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:32.401736', 'step': 20029, 'epoch': 3} {'type': 'loss', 'content': 0.13858072459697723, 'timestamp': '2025-09-10 02:49:32.404297', 'step': 20030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:32.434223', 'step': 20030, 'epoch': 3} {'type': 'loss', 'content': 0.07260271906852722, 'timestamp': '2025-09-10 02:49:32.437020', 'step': 20031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:32.466844', 'step': 20031, 'epoch': 3} {'type': 'loss', 'content': 0.07247167080640793, 'timestamp': '2025-09-10 02:49:32.490580', 'step': 20032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:32.520426', 'step': 20032, 'epoch': 3} {'type': 'loss', 'content': 0.07618481665849686, 'timestamp': '2025-09-10 02:49:32.522320', 'step': 20033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:32.553844', 'step': 20033, 'epoch': 3} {'type': 'loss', 'content': 0.10179392993450165, 'timestamp': '2025-09-10 02:49:32.556478', 'step': 20034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:32.589032', 'step': 20034, 'epoch': 3} {'type': 'loss', 'content': 0.09027349203824997, 'timestamp': '2025-09-10 02:49:32.591311', 'step': 20035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:32.622294', 'step': 20035, 'epoch': 3} {'type': 'loss', 'content': 0.0416422113776207, 'timestamp': '2025-09-10 02:49:32.645979', 'step': 20036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:32.678601', 'step': 20036, 'epoch': 3} {'type': 'loss', 'content': 0.06714381277561188, 'timestamp': '2025-09-10 02:49:32.681367', 'step': 20037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:32.712776', 'step': 20037, 'epoch': 3} {'type': 'loss', 'content': 0.10367380827665329, 'timestamp': '2025-09-10 02:49:32.715284', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:49:40.767870', 'step': 20038, 'epoch': 3} {'type': 'pplx', 'content': 15450.52846822636, 'timestamp': '2025-09-10 02:49:40.771401', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:40.802969', 'step': 20038, 'epoch': 3} {'type': 'loss', 'content': 0.030369797721505165, 'timestamp': '2025-09-10 02:49:40.804993', 'step': 20039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:40.835367', 'step': 20039, 'epoch': 3} {'type': 'loss', 'content': 0.1381424218416214, 'timestamp': '2025-09-10 02:49:40.859010', 'step': 20040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:40.889479', 'step': 20040, 'epoch': 3} {'type': 'loss', 'content': 0.07611999660730362, 'timestamp': '2025-09-10 02:49:40.891779', 'step': 20041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:40.922533', 'step': 20041, 'epoch': 3} {'type': 'loss', 'content': 0.03946288302540779, 'timestamp': '2025-09-10 02:49:40.924892', 'step': 20042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:40.955777', 'step': 20042, 'epoch': 3} {'type': 'loss', 'content': 0.03152972832322121, 'timestamp': '2025-09-10 02:49:40.957980', 'step': 20043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:40.992963', 'step': 20043, 'epoch': 3} {'type': 'loss', 'content': 0.0787271037697792, 'timestamp': '2025-09-10 02:49:41.016653', 'step': 20044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:41.047104', 'step': 20044, 'epoch': 3} {'type': 'loss', 'content': 0.035383258014917374, 'timestamp': '2025-09-10 02:49:41.050042', 'step': 20045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:41.080890', 'step': 20045, 'epoch': 3} {'type': 'loss', 'content': 0.09366852045059204, 'timestamp': '2025-09-10 02:49:41.083444', 'step': 20046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:41.113462', 'step': 20046, 'epoch': 3} {'type': 'loss', 'content': 0.02299726940691471, 'timestamp': '2025-09-10 02:49:41.115685', 'step': 20047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:41.146310', 'step': 20047, 'epoch': 3} {'type': 'loss', 'content': 0.06620922684669495, 'timestamp': '2025-09-10 02:49:41.170560', 'step': 20048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:41.201466', 'step': 20048, 'epoch': 3} {'type': 'loss', 'content': 0.04001398757100105, 'timestamp': '2025-09-10 02:49:41.203888', 'step': 20049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:41.234694', 'step': 20049, 'epoch': 3} {'type': 'loss', 'content': 0.06880643218755722, 'timestamp': '2025-09-10 02:49:41.236971', 'step': 20050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:41.268134', 'step': 20050, 'epoch': 3} {'type': 'loss', 'content': 0.05135028809309006, 'timestamp': '2025-09-10 02:49:41.270299', 'step': 20051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:41.303369', 'step': 20051, 'epoch': 3} {'type': 'loss', 'content': 0.03955259174108505, 'timestamp': '2025-09-10 02:49:41.328584', 'step': 20052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:41.359206', 'step': 20052, 'epoch': 3} {'type': 'loss', 'content': 0.05050904303789139, 'timestamp': '2025-09-10 02:49:41.361735', 'step': 20053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:41.392244', 'step': 20053, 'epoch': 3} {'type': 'loss', 'content': 0.06479441374540329, 'timestamp': '2025-09-10 02:49:41.394807', 'step': 20054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:41.425313', 'step': 20054, 'epoch': 3} {'type': 'loss', 'content': 0.03655582293868065, 'timestamp': '2025-09-10 02:49:41.427583', 'step': 20055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:41.458246', 'step': 20055, 'epoch': 3} {'type': 'loss', 'content': 0.116398386657238, 'timestamp': '2025-09-10 02:49:41.481767', 'step': 20056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:41.512955', 'step': 20056, 'epoch': 3} {'type': 'loss', 'content': 0.043260008096694946, 'timestamp': '2025-09-10 02:49:41.515711', 'step': 20057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:41.546342', 'step': 20057, 'epoch': 3} {'type': 'loss', 'content': 0.056531645357608795, 'timestamp': '2025-09-10 02:49:41.548947', 'step': 20058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:41.581179', 'step': 20058, 'epoch': 3} {'type': 'loss', 'content': 0.14431826770305634, 'timestamp': '2025-09-10 02:49:41.583413', 'step': 20059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:41.615530', 'step': 20059, 'epoch': 3} {'type': 'loss', 'content': 0.07791578024625778, 'timestamp': '2025-09-10 02:49:41.638963', 'step': 20060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:41.669394', 'step': 20060, 'epoch': 3} {'type': 'loss', 'content': 0.17119444906711578, 'timestamp': '2025-09-10 02:49:41.671571', 'step': 20061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:41.702111', 'step': 20061, 'epoch': 3} {'type': 'loss', 'content': 0.07579275965690613, 'timestamp': '2025-09-10 02:49:41.704349', 'step': 20062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:41.733536', 'step': 20062, 'epoch': 3} {'type': 'loss', 'content': 0.04921148717403412, 'timestamp': '2025-09-10 02:49:41.736000', 'step': 20063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:41.767129', 'step': 20063, 'epoch': 3} {'type': 'loss', 'content': 0.0476149283349514, 'timestamp': '2025-09-10 02:49:41.790629', 'step': 20064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:41.821071', 'step': 20064, 'epoch': 3} {'type': 'loss', 'content': 0.043460555374622345, 'timestamp': '2025-09-10 02:49:41.823308', 'step': 20065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:41.853444', 'step': 20065, 'epoch': 3} {'type': 'loss', 'content': 0.055422261357307434, 'timestamp': '2025-09-10 02:49:41.856891', 'step': 20066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:41.886650', 'step': 20066, 'epoch': 3} {'type': 'loss', 'content': 0.03799125552177429, 'timestamp': '2025-09-10 02:49:41.889004', 'step': 20067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:41.919477', 'step': 20067, 'epoch': 3} {'type': 'loss', 'content': 0.050538308918476105, 'timestamp': '2025-09-10 02:49:41.942948', 'step': 20068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:41.975381', 'step': 20068, 'epoch': 3} {'type': 'loss', 'content': 0.006040473934262991, 'timestamp': '2025-09-10 02:49:41.977695', 'step': 20069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:42.007738', 'step': 20069, 'epoch': 3} {'type': 'loss', 'content': 0.07120618969202042, 'timestamp': '2025-09-10 02:49:42.009765', 'step': 20070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:42.038773', 'step': 20070, 'epoch': 3} {'type': 'loss', 'content': 0.033098552376031876, 'timestamp': '2025-09-10 02:49:42.040895', 'step': 20071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:42.071241', 'step': 20071, 'epoch': 3} {'type': 'loss', 'content': 0.07217230647802353, 'timestamp': '2025-09-10 02:49:42.094847', 'step': 20072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:42.128799', 'step': 20072, 'epoch': 3} {'type': 'loss', 'content': 0.09746919572353363, 'timestamp': '2025-09-10 02:49:42.131571', 'step': 20073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:42.162044', 'step': 20073, 'epoch': 3} {'type': 'loss', 'content': 0.0852234959602356, 'timestamp': '2025-09-10 02:49:42.164380', 'step': 20074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:42.194734', 'step': 20074, 'epoch': 3} {'type': 'loss', 'content': 0.06056887283921242, 'timestamp': '2025-09-10 02:49:42.197241', 'step': 20075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:42.228023', 'step': 20075, 'epoch': 3} {'type': 'loss', 'content': 0.05662374198436737, 'timestamp': '2025-09-10 02:49:42.251491', 'step': 20076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:42.282503', 'step': 20076, 'epoch': 3} {'type': 'loss', 'content': 0.038021333515644073, 'timestamp': '2025-09-10 02:49:42.285913', 'step': 20077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:42.316182', 'step': 20077, 'epoch': 3} {'type': 'loss', 'content': 0.03545229509472847, 'timestamp': '2025-09-10 02:49:42.318806', 'step': 20078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:42.348888', 'step': 20078, 'epoch': 3} {'type': 'loss', 'content': 0.0510517917573452, 'timestamp': '2025-09-10 02:49:42.351177', 'step': 20079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:42.381353', 'step': 20079, 'epoch': 3} {'type': 'loss', 'content': 0.15389040112495422, 'timestamp': '2025-09-10 02:49:42.404584', 'step': 20080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:42.434299', 'step': 20080, 'epoch': 3} {'type': 'loss', 'content': 0.017699109390378, 'timestamp': '2025-09-10 02:49:42.436536', 'step': 20081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:42.466604', 'step': 20081, 'epoch': 3} {'type': 'loss', 'content': 0.06797691434621811, 'timestamp': '2025-09-10 02:49:42.469332', 'step': 20082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:42.500299', 'step': 20082, 'epoch': 3} {'type': 'loss', 'content': 0.09137415885925293, 'timestamp': '2025-09-10 02:49:42.502907', 'step': 20083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:42.534571', 'step': 20083, 'epoch': 3} {'type': 'loss', 'content': 0.07215616852045059, 'timestamp': '2025-09-10 02:49:42.558167', 'step': 20084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:42.590129', 'step': 20084, 'epoch': 3} {'type': 'loss', 'content': 0.06686754524707794, 'timestamp': '2025-09-10 02:49:42.592534', 'step': 20085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:42.622988', 'step': 20085, 'epoch': 3} {'type': 'loss', 'content': 0.0656694695353508, 'timestamp': '2025-09-10 02:49:42.624894', 'step': 20086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:42.655341', 'step': 20086, 'epoch': 3} {'type': 'loss', 'content': 0.09568064659833908, 'timestamp': '2025-09-10 02:49:42.657972', 'step': 20087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:42.687967', 'step': 20087, 'epoch': 3} {'type': 'loss', 'content': 0.05372905358672142, 'timestamp': '2025-09-10 02:49:42.711421', 'step': 20088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:42.742191', 'step': 20088, 'epoch': 3} {'type': 'loss', 'content': 0.09502767026424408, 'timestamp': '2025-09-10 02:49:42.744611', 'step': 20089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:42.775938', 'step': 20089, 'epoch': 3} {'type': 'loss', 'content': 0.06394726783037186, 'timestamp': '2025-09-10 02:49:42.778401', 'step': 20090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:42.808489', 'step': 20090, 'epoch': 3} {'type': 'loss', 'content': 0.10291620343923569, 'timestamp': '2025-09-10 02:49:42.810993', 'step': 20091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:42.840137', 'step': 20091, 'epoch': 3} {'type': 'loss', 'content': 0.09494555741548538, 'timestamp': '2025-09-10 02:49:42.864006', 'step': 20092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:42.896034', 'step': 20092, 'epoch': 3} {'type': 'loss', 'content': 0.025730866938829422, 'timestamp': '2025-09-10 02:49:42.898228', 'step': 20093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:42.928564', 'step': 20093, 'epoch': 3} {'type': 'loss', 'content': 0.05974351242184639, 'timestamp': '2025-09-10 02:49:42.930942', 'step': 20094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:42.962187', 'step': 20094, 'epoch': 3} {'type': 'loss', 'content': 0.0713753029704094, 'timestamp': '2025-09-10 02:49:42.964921', 'step': 20095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:42.997771', 'step': 20095, 'epoch': 3} {'type': 'loss', 'content': 0.031137842684984207, 'timestamp': '2025-09-10 02:49:43.021177', 'step': 20096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:49:43.051544', 'step': 20096, 'epoch': 3} {'type': 'loss', 'content': 0.027322323992848396, 'timestamp': '2025-09-10 02:49:43.054365', 'step': 20097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:43.084129', 'step': 20097, 'epoch': 3} {'type': 'loss', 'content': 0.016234517097473145, 'timestamp': '2025-09-10 02:49:43.086766', 'step': 20098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:43.117564', 'step': 20098, 'epoch': 3} {'type': 'loss', 'content': 0.012497041374444962, 'timestamp': '2025-09-10 02:49:43.119790', 'step': 20099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:43.151604', 'step': 20099, 'epoch': 3} {'type': 'loss', 'content': 0.048067767173051834, 'timestamp': '2025-09-10 02:49:43.182927', 'step': 20100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:43.215037', 'step': 20100, 'epoch': 3} {'type': 'loss', 'content': 0.06860409677028656, 'timestamp': '2025-09-10 02:49:43.217406', 'step': 20101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:43.247175', 'step': 20101, 'epoch': 3} {'type': 'loss', 'content': 0.08897766470909119, 'timestamp': '2025-09-10 02:49:43.249268', 'step': 20102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:43.280339', 'step': 20102, 'epoch': 3} {'type': 'loss', 'content': 0.07162509858608246, 'timestamp': '2025-09-10 02:49:43.283785', 'step': 20103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:43.315860', 'step': 20103, 'epoch': 3} {'type': 'loss', 'content': 0.07183340936899185, 'timestamp': '2025-09-10 02:49:43.349690', 'step': 20104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:43.380171', 'step': 20104, 'epoch': 3} {'type': 'loss', 'content': 0.05330873653292656, 'timestamp': '2025-09-10 02:49:43.382697', 'step': 20105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:43.413397', 'step': 20105, 'epoch': 3} {'type': 'loss', 'content': 0.0808858722448349, 'timestamp': '2025-09-10 02:49:43.418926', 'step': 20106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:43.462195', 'step': 20106, 'epoch': 3} {'type': 'loss', 'content': 0.02406330220401287, 'timestamp': '2025-09-10 02:49:43.468451', 'step': 20107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:43.508296', 'step': 20107, 'epoch': 3} {'type': 'loss', 'content': 0.07292229682207108, 'timestamp': '2025-09-10 02:49:43.531631', 'step': 20108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:43.562457', 'step': 20108, 'epoch': 3} {'type': 'loss', 'content': 0.044923774898052216, 'timestamp': '2025-09-10 02:49:43.564953', 'step': 20109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:43.594977', 'step': 20109, 'epoch': 3} {'type': 'loss', 'content': 0.050150614231824875, 'timestamp': '2025-09-10 02:49:43.597147', 'step': 20110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:43.627558', 'step': 20110, 'epoch': 3} {'type': 'loss', 'content': 0.058554649353027344, 'timestamp': '2025-09-10 02:49:43.630023', 'step': 20111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:43.660242', 'step': 20111, 'epoch': 3} {'type': 'loss', 'content': 0.10022293776273727, 'timestamp': '2025-09-10 02:49:43.683772', 'step': 20112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:43.714577', 'step': 20112, 'epoch': 3} {'type': 'loss', 'content': 0.04423481225967407, 'timestamp': '2025-09-10 02:49:43.716715', 'step': 20113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:43.746672', 'step': 20113, 'epoch': 3} {'type': 'loss', 'content': 0.08787471801042557, 'timestamp': '2025-09-10 02:49:43.749105', 'step': 20114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:43.780765', 'step': 20114, 'epoch': 3} {'type': 'loss', 'content': 0.028617147356271744, 'timestamp': '2025-09-10 02:49:43.783308', 'step': 20115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:43.813236', 'step': 20115, 'epoch': 3} {'type': 'loss', 'content': 0.04094497859477997, 'timestamp': '2025-09-10 02:49:43.836614', 'step': 20116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:43.866969', 'step': 20116, 'epoch': 3} {'type': 'loss', 'content': 0.14038623869419098, 'timestamp': '2025-09-10 02:49:43.869229', 'step': 20117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:43.899305', 'step': 20117, 'epoch': 3} {'type': 'loss', 'content': 0.06185755506157875, 'timestamp': '2025-09-10 02:49:43.901530', 'step': 20118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:43.931712', 'step': 20118, 'epoch': 3} {'type': 'loss', 'content': 0.037287235260009766, 'timestamp': '2025-09-10 02:49:43.933751', 'step': 20119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:43.963443', 'step': 20119, 'epoch': 3} {'type': 'loss', 'content': 0.12228760868310928, 'timestamp': '2025-09-10 02:49:43.987055', 'step': 20120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:44.017360', 'step': 20120, 'epoch': 3} {'type': 'loss', 'content': 0.12417453527450562, 'timestamp': '2025-09-10 02:49:44.019358', 'step': 20121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:44.049175', 'step': 20121, 'epoch': 3} {'type': 'loss', 'content': 0.09616276621818542, 'timestamp': '2025-09-10 02:49:44.051277', 'step': 20122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:49:44.082266', 'step': 20122, 'epoch': 3} {'type': 'loss', 'content': 0.03244169428944588, 'timestamp': '2025-09-10 02:49:44.086843', 'step': 20123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:44.117245', 'step': 20123, 'epoch': 3} {'type': 'loss', 'content': 0.045951467007398605, 'timestamp': '2025-09-10 02:49:44.140231', 'step': 20124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:44.170436', 'step': 20124, 'epoch': 3} {'type': 'loss', 'content': 0.028301902115345, 'timestamp': '2025-09-10 02:49:44.172963', 'step': 20125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:44.203684', 'step': 20125, 'epoch': 3} {'type': 'loss', 'content': 0.04014693573117256, 'timestamp': '2025-09-10 02:49:44.206203', 'step': 20126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:44.236741', 'step': 20126, 'epoch': 3} {'type': 'loss', 'content': 0.05351511389017105, 'timestamp': '2025-09-10 02:49:44.239061', 'step': 20127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:44.271449', 'step': 20127, 'epoch': 3} {'type': 'loss', 'content': 0.04777977243065834, 'timestamp': '2025-09-10 02:49:44.294859', 'step': 20128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:44.326534', 'step': 20128, 'epoch': 3} {'type': 'loss', 'content': 0.07914262264966965, 'timestamp': '2025-09-10 02:49:44.328657', 'step': 20129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:44.358868', 'step': 20129, 'epoch': 3} {'type': 'loss', 'content': 0.06292114406824112, 'timestamp': '2025-09-10 02:49:44.361084', 'step': 20130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:44.391429', 'step': 20130, 'epoch': 3} {'type': 'loss', 'content': 0.055791303515434265, 'timestamp': '2025-09-10 02:49:44.393867', 'step': 20131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:44.424377', 'step': 20131, 'epoch': 3} {'type': 'loss', 'content': 0.01651848666369915, 'timestamp': '2025-09-10 02:49:44.448021', 'step': 20132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:44.478204', 'step': 20132, 'epoch': 3} {'type': 'loss', 'content': 0.044132694602012634, 'timestamp': '2025-09-10 02:49:44.480748', 'step': 20133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:44.511564', 'step': 20133, 'epoch': 3} {'type': 'loss', 'content': 0.1373330056667328, 'timestamp': '2025-09-10 02:49:44.513409', 'step': 20134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:44.542962', 'step': 20134, 'epoch': 3} {'type': 'loss', 'content': 0.09654270112514496, 'timestamp': '2025-09-10 02:49:44.545152', 'step': 20135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:44.575242', 'step': 20135, 'epoch': 3} {'type': 'loss', 'content': 0.06547138094902039, 'timestamp': '2025-09-10 02:49:44.598665', 'step': 20136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:44.630143', 'step': 20136, 'epoch': 3} {'type': 'loss', 'content': 0.04522280395030975, 'timestamp': '2025-09-10 02:49:44.632480', 'step': 20137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:44.662971', 'step': 20137, 'epoch': 3} {'type': 'loss', 'content': 0.044427935034036636, 'timestamp': '2025-09-10 02:49:44.665595', 'step': 20138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:44.696234', 'step': 20138, 'epoch': 3} {'type': 'loss', 'content': 0.051060013473033905, 'timestamp': '2025-09-10 02:49:44.698375', 'step': 20139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:44.727944', 'step': 20139, 'epoch': 3} {'type': 'loss', 'content': 0.04476609081029892, 'timestamp': '2025-09-10 02:49:44.751500', 'step': 20140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:44.783252', 'step': 20140, 'epoch': 3} {'type': 'loss', 'content': 0.12240589410066605, 'timestamp': '2025-09-10 02:49:44.786408', 'step': 20141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:44.819278', 'step': 20141, 'epoch': 3} {'type': 'loss', 'content': 0.03883139789104462, 'timestamp': '2025-09-10 02:49:44.821325', 'step': 20142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:44.850933', 'step': 20142, 'epoch': 3} {'type': 'loss', 'content': 0.022245310246944427, 'timestamp': '2025-09-10 02:49:44.853196', 'step': 20143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:44.884206', 'step': 20143, 'epoch': 3} {'type': 'loss', 'content': 0.10119105875492096, 'timestamp': '2025-09-10 02:49:44.907694', 'step': 20144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:44.938361', 'step': 20144, 'epoch': 3} {'type': 'loss', 'content': 0.014416033402085304, 'timestamp': '2025-09-10 02:49:44.940710', 'step': 20145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:44.970512', 'step': 20145, 'epoch': 3} {'type': 'loss', 'content': 0.059767864644527435, 'timestamp': '2025-09-10 02:49:44.973476', 'step': 20146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:45.004382', 'step': 20146, 'epoch': 3} {'type': 'loss', 'content': 0.06717924773693085, 'timestamp': '2025-09-10 02:49:45.007317', 'step': 20147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:45.037394', 'step': 20147, 'epoch': 3} {'type': 'loss', 'content': 0.018807366490364075, 'timestamp': '2025-09-10 02:49:45.060889', 'step': 20148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:45.091292', 'step': 20148, 'epoch': 3} {'type': 'loss', 'content': 0.07978171855211258, 'timestamp': '2025-09-10 02:49:45.092987', 'step': 20149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:45.123517', 'step': 20149, 'epoch': 3} {'type': 'loss', 'content': 0.1121903732419014, 'timestamp': '2025-09-10 02:49:45.125805', 'step': 20150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:45.155808', 'step': 20150, 'epoch': 3} {'type': 'loss', 'content': 0.013296815566718578, 'timestamp': '2025-09-10 02:49:45.159285', 'step': 20151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:45.188782', 'step': 20151, 'epoch': 3} {'type': 'loss', 'content': 0.012744725681841373, 'timestamp': '2025-09-10 02:49:45.212507', 'step': 20152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:45.241970', 'step': 20152, 'epoch': 3} {'type': 'loss', 'content': 0.007995215244591236, 'timestamp': '2025-09-10 02:49:45.244620', 'step': 20153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:45.276717', 'step': 20153, 'epoch': 3} {'type': 'loss', 'content': 0.11909854412078857, 'timestamp': '2025-09-10 02:49:45.279228', 'step': 20154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:45.309884', 'step': 20154, 'epoch': 3} {'type': 'loss', 'content': 0.007068650331348181, 'timestamp': '2025-09-10 02:49:45.312000', 'step': 20155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:45.341029', 'step': 20155, 'epoch': 3} {'type': 'loss', 'content': 0.05436920374631882, 'timestamp': '2025-09-10 02:49:45.364616', 'step': 20156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:45.394858', 'step': 20156, 'epoch': 3} {'type': 'loss', 'content': 0.04386330395936966, 'timestamp': '2025-09-10 02:49:45.397090', 'step': 20157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:45.429290', 'step': 20157, 'epoch': 3} {'type': 'loss', 'content': 0.05397539585828781, 'timestamp': '2025-09-10 02:49:45.431514', 'step': 20158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:45.461408', 'step': 20158, 'epoch': 3} {'type': 'loss', 'content': 0.08696066588163376, 'timestamp': '2025-09-10 02:49:45.463474', 'step': 20159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:45.493496', 'step': 20159, 'epoch': 3} {'type': 'loss', 'content': 0.08635818958282471, 'timestamp': '2025-09-10 02:49:45.517199', 'step': 20160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:45.547537', 'step': 20160, 'epoch': 3} {'type': 'loss', 'content': 0.04582022503018379, 'timestamp': '2025-09-10 02:49:45.550512', 'step': 20161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:45.581647', 'step': 20161, 'epoch': 3} {'type': 'loss', 'content': 0.09859916567802429, 'timestamp': '2025-09-10 02:49:45.584487', 'step': 20162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:45.614748', 'step': 20162, 'epoch': 3} {'type': 'loss', 'content': 0.03205329552292824, 'timestamp': '2025-09-10 02:49:45.616895', 'step': 20163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:45.646336', 'step': 20163, 'epoch': 3} {'type': 'loss', 'content': 0.10132760554552078, 'timestamp': '2025-09-10 02:49:45.670125', 'step': 20164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:45.700522', 'step': 20164, 'epoch': 3} {'type': 'loss', 'content': 0.016558395698666573, 'timestamp': '2025-09-10 02:49:45.702848', 'step': 20165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:45.734954', 'step': 20165, 'epoch': 3} {'type': 'loss', 'content': 0.05092356353998184, 'timestamp': '2025-09-10 02:49:45.737243', 'step': 20166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:45.767890', 'step': 20166, 'epoch': 3} {'type': 'loss', 'content': 0.058347005397081375, 'timestamp': '2025-09-10 02:49:45.770694', 'step': 20167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:45.801328', 'step': 20167, 'epoch': 3} {'type': 'loss', 'content': 0.07942529022693634, 'timestamp': '2025-09-10 02:49:45.825080', 'step': 20168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:45.858006', 'step': 20168, 'epoch': 3} {'type': 'loss', 'content': 0.019961180165410042, 'timestamp': '2025-09-10 02:49:45.860354', 'step': 20169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:45.890574', 'step': 20169, 'epoch': 3} {'type': 'loss', 'content': 0.12526345252990723, 'timestamp': '2025-09-10 02:49:45.893001', 'step': 20170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:45.923183', 'step': 20170, 'epoch': 3} {'type': 'loss', 'content': 0.06784214079380035, 'timestamp': '2025-09-10 02:49:45.925489', 'step': 20171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:45.955367', 'step': 20171, 'epoch': 3} {'type': 'loss', 'content': 0.05456063151359558, 'timestamp': '2025-09-10 02:49:45.978970', 'step': 20172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:46.010909', 'step': 20172, 'epoch': 3} {'type': 'loss', 'content': 0.06811711192131042, 'timestamp': '2025-09-10 02:49:46.013284', 'step': 20173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:46.043381', 'step': 20173, 'epoch': 3} {'type': 'loss', 'content': 0.057562123984098434, 'timestamp': '2025-09-10 02:49:46.045996', 'step': 20174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:46.076185', 'step': 20174, 'epoch': 3} {'type': 'loss', 'content': 0.08341408520936966, 'timestamp': '2025-09-10 02:49:46.078607', 'step': 20175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:46.108459', 'step': 20175, 'epoch': 3} {'type': 'loss', 'content': 0.05817805603146553, 'timestamp': '2025-09-10 02:49:46.133177', 'step': 20176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:46.163136', 'step': 20176, 'epoch': 3} {'type': 'loss', 'content': 0.06411593407392502, 'timestamp': '2025-09-10 02:49:46.165731', 'step': 20177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:46.196365', 'step': 20177, 'epoch': 3} {'type': 'loss', 'content': 0.064786396920681, 'timestamp': '2025-09-10 02:49:46.198630', 'step': 20178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:46.230576', 'step': 20178, 'epoch': 3} {'type': 'loss', 'content': 0.04233473166823387, 'timestamp': '2025-09-10 02:49:46.232984', 'step': 20179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:46.275576', 'step': 20179, 'epoch': 3} {'type': 'loss', 'content': 0.03569700941443443, 'timestamp': '2025-09-10 02:49:46.299556', 'step': 20180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:46.338568', 'step': 20180, 'epoch': 3} {'type': 'loss', 'content': 0.012906042858958244, 'timestamp': '2025-09-10 02:49:46.341396', 'step': 20181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:46.374865', 'step': 20181, 'epoch': 3} {'type': 'loss', 'content': 0.034995730966329575, 'timestamp': '2025-09-10 02:49:46.377590', 'step': 20182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:46.408373', 'step': 20182, 'epoch': 3} {'type': 'loss', 'content': 0.02650424838066101, 'timestamp': '2025-09-10 02:49:46.410789', 'step': 20183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:46.441580', 'step': 20183, 'epoch': 3} {'type': 'loss', 'content': 0.015153053216636181, 'timestamp': '2025-09-10 02:49:46.465536', 'step': 20184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:46.500516', 'step': 20184, 'epoch': 3} {'type': 'loss', 'content': 0.0926559641957283, 'timestamp': '2025-09-10 02:49:46.506093', 'step': 20185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:46.541042', 'step': 20185, 'epoch': 3} {'type': 'loss', 'content': 0.06485193222761154, 'timestamp': '2025-09-10 02:49:46.543391', 'step': 20186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:46.573714', 'step': 20186, 'epoch': 3} {'type': 'loss', 'content': 0.06416034698486328, 'timestamp': '2025-09-10 02:49:46.576729', 'step': 20187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:46.610065', 'step': 20187, 'epoch': 3} {'type': 'loss', 'content': 0.008267948403954506, 'timestamp': '2025-09-10 02:49:46.636118', 'step': 20188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:46.671649', 'step': 20188, 'epoch': 3} {'type': 'loss', 'content': 0.09391650557518005, 'timestamp': '2025-09-10 02:49:46.676520', 'step': 20189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:46.707955', 'step': 20189, 'epoch': 3} {'type': 'loss', 'content': 0.06410104036331177, 'timestamp': '2025-09-10 02:49:46.710790', 'step': 20190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:46.742639', 'step': 20190, 'epoch': 3} {'type': 'loss', 'content': 0.07379629462957382, 'timestamp': '2025-09-10 02:49:46.749710', 'step': 20191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:46.784519', 'step': 20191, 'epoch': 3} {'type': 'loss', 'content': 0.02069843001663685, 'timestamp': '2025-09-10 02:49:46.807935', 'step': 20192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:46.839448', 'step': 20192, 'epoch': 3} {'type': 'loss', 'content': 0.11814209818840027, 'timestamp': '2025-09-10 02:49:46.842097', 'step': 20193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:46.883047', 'step': 20193, 'epoch': 3} {'type': 'loss', 'content': 0.08604718744754791, 'timestamp': '2025-09-10 02:49:46.896553', 'step': 20194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:46.937450', 'step': 20194, 'epoch': 3} {'type': 'loss', 'content': 0.057171497493982315, 'timestamp': '2025-09-10 02:49:46.939866', 'step': 20195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:46.970868', 'step': 20195, 'epoch': 3} {'type': 'loss', 'content': 0.049965713173151016, 'timestamp': '2025-09-10 02:49:46.995593', 'step': 20196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:47.027154', 'step': 20196, 'epoch': 3} {'type': 'loss', 'content': 0.027522891759872437, 'timestamp': '2025-09-10 02:49:47.029638', 'step': 20197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:47.075054', 'step': 20197, 'epoch': 3} {'type': 'loss', 'content': 0.07425841689109802, 'timestamp': '2025-09-10 02:49:47.077877', 'step': 20198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:47.108331', 'step': 20198, 'epoch': 3} {'type': 'loss', 'content': 0.15745891630649567, 'timestamp': '2025-09-10 02:49:47.114405', 'step': 20199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:47.149286', 'step': 20199, 'epoch': 3} {'type': 'loss', 'content': 0.06491285562515259, 'timestamp': '2025-09-10 02:49:47.173085', 'step': 20200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:47.204485', 'step': 20200, 'epoch': 3} {'type': 'loss', 'content': 0.09529457241296768, 'timestamp': '2025-09-10 02:49:47.206922', 'step': 20201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:47.237467', 'step': 20201, 'epoch': 3} {'type': 'loss', 'content': 0.13203966617584229, 'timestamp': '2025-09-10 02:49:47.240985', 'step': 20202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:47.272565', 'step': 20202, 'epoch': 3} {'type': 'loss', 'content': 0.08142494410276413, 'timestamp': '2025-09-10 02:49:47.275080', 'step': 20203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:47.309945', 'step': 20203, 'epoch': 3} {'type': 'loss', 'content': 0.08182253688573837, 'timestamp': '2025-09-10 02:49:47.333660', 'step': 20204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:47.365617', 'step': 20204, 'epoch': 3} {'type': 'loss', 'content': 0.11684603989124298, 'timestamp': '2025-09-10 02:49:47.368916', 'step': 20205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:47.404346', 'step': 20205, 'epoch': 3} {'type': 'loss', 'content': 0.1606302559375763, 'timestamp': '2025-09-10 02:49:47.407247', 'step': 20206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:47.440702', 'step': 20206, 'epoch': 3} {'type': 'loss', 'content': 0.043757569044828415, 'timestamp': '2025-09-10 02:49:47.447026', 'step': 20207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:47.481681', 'step': 20207, 'epoch': 3} {'type': 'loss', 'content': 0.052915945649147034, 'timestamp': '2025-09-10 02:49:47.505335', 'step': 20208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:47.545212', 'step': 20208, 'epoch': 3} {'type': 'loss', 'content': 0.05802835896611214, 'timestamp': '2025-09-10 02:49:47.547752', 'step': 20209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:47.593328', 'step': 20209, 'epoch': 3} {'type': 'loss', 'content': 0.10034310072660446, 'timestamp': '2025-09-10 02:49:47.596650', 'step': 20210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:47.629367', 'step': 20210, 'epoch': 3} {'type': 'loss', 'content': 0.09692586213350296, 'timestamp': '2025-09-10 02:49:47.632649', 'step': 20211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:47.665207', 'step': 20211, 'epoch': 3} {'type': 'loss', 'content': 0.06373263150453568, 'timestamp': '2025-09-10 02:49:47.689434', 'step': 20212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:47.720701', 'step': 20212, 'epoch': 3} {'type': 'loss', 'content': 0.1569349318742752, 'timestamp': '2025-09-10 02:49:47.723787', 'step': 20213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:47.760625', 'step': 20213, 'epoch': 3} {'type': 'loss', 'content': 0.03436984866857529, 'timestamp': '2025-09-10 02:49:47.764960', 'step': 20214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:47.797628', 'step': 20214, 'epoch': 3} {'type': 'loss', 'content': 0.033526863902807236, 'timestamp': '2025-09-10 02:49:47.800855', 'step': 20215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:47.832378', 'step': 20215, 'epoch': 3} {'type': 'loss', 'content': 0.07049847394227982, 'timestamp': '2025-09-10 02:49:47.856389', 'step': 20216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:47.888947', 'step': 20216, 'epoch': 3} {'type': 'loss', 'content': 0.09560097008943558, 'timestamp': '2025-09-10 02:49:47.891349', 'step': 20217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:47.921297', 'step': 20217, 'epoch': 3} {'type': 'loss', 'content': 0.06810903549194336, 'timestamp': '2025-09-10 02:49:47.924240', 'step': 20218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:47.954863', 'step': 20218, 'epoch': 3} {'type': 'loss', 'content': 0.09168379753828049, 'timestamp': '2025-09-10 02:49:47.957491', 'step': 20219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:47.987744', 'step': 20219, 'epoch': 3} {'type': 'loss', 'content': 0.10743623226881027, 'timestamp': '2025-09-10 02:49:48.011109', 'step': 20220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.042353', 'step': 20220, 'epoch': 3} {'type': 'loss', 'content': 0.06440120190382004, 'timestamp': '2025-09-10 02:49:48.044915', 'step': 20221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:48.075378', 'step': 20221, 'epoch': 3} {'type': 'loss', 'content': 0.070409394800663, 'timestamp': '2025-09-10 02:49:48.078025', 'step': 20222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.108464', 'step': 20222, 'epoch': 3} {'type': 'loss', 'content': 0.11268899589776993, 'timestamp': '2025-09-10 02:49:48.111999', 'step': 20223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.148602', 'step': 20223, 'epoch': 3} {'type': 'loss', 'content': 0.03558938950300217, 'timestamp': '2025-09-10 02:49:48.172315', 'step': 20224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.203397', 'step': 20224, 'epoch': 3} {'type': 'loss', 'content': 0.028995810076594353, 'timestamp': '2025-09-10 02:49:48.206082', 'step': 20225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:48.237513', 'step': 20225, 'epoch': 3} {'type': 'loss', 'content': 0.07924303412437439, 'timestamp': '2025-09-10 02:49:48.240158', 'step': 20226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.270816', 'step': 20226, 'epoch': 3} {'type': 'loss', 'content': 0.04033000022172928, 'timestamp': '2025-09-10 02:49:48.282592', 'step': 20227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:48.321790', 'step': 20227, 'epoch': 3} {'type': 'loss', 'content': 0.06922008842229843, 'timestamp': '2025-09-10 02:49:48.345820', 'step': 20228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.376541', 'step': 20228, 'epoch': 3} {'type': 'loss', 'content': 0.09065908193588257, 'timestamp': '2025-09-10 02:49:48.378939', 'step': 20229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:48.410340', 'step': 20229, 'epoch': 3} {'type': 'loss', 'content': 0.1193552315235138, 'timestamp': '2025-09-10 02:49:48.412957', 'step': 20230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.443225', 'step': 20230, 'epoch': 3} {'type': 'loss', 'content': 0.045109570026397705, 'timestamp': '2025-09-10 02:49:48.445370', 'step': 20231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:48.477992', 'step': 20231, 'epoch': 3} {'type': 'loss', 'content': 0.07813577353954315, 'timestamp': '2025-09-10 02:49:48.502029', 'step': 20232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:48.533154', 'step': 20232, 'epoch': 3} {'type': 'loss', 'content': 0.1188032254576683, 'timestamp': '2025-09-10 02:49:48.535609', 'step': 20233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.566677', 'step': 20233, 'epoch': 3} {'type': 'loss', 'content': 0.19596613943576813, 'timestamp': '2025-09-10 02:49:48.569330', 'step': 20234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:48.599581', 'step': 20234, 'epoch': 3} {'type': 'loss', 'content': 0.0914146676659584, 'timestamp': '2025-09-10 02:49:48.602884', 'step': 20235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:48.634058', 'step': 20235, 'epoch': 3} {'type': 'loss', 'content': 0.10249752551317215, 'timestamp': '2025-09-10 02:49:48.658081', 'step': 20236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:48.689508', 'step': 20236, 'epoch': 3} {'type': 'loss', 'content': 0.023390745744109154, 'timestamp': '2025-09-10 02:49:48.692312', 'step': 20237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.722179', 'step': 20237, 'epoch': 3} {'type': 'loss', 'content': 0.04859783500432968, 'timestamp': '2025-09-10 02:49:48.724581', 'step': 20238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.754241', 'step': 20238, 'epoch': 3} {'type': 'loss', 'content': 0.04885781183838844, 'timestamp': '2025-09-10 02:49:48.757265', 'step': 20239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:48.790044', 'step': 20239, 'epoch': 3} {'type': 'loss', 'content': 0.10643580555915833, 'timestamp': '2025-09-10 02:49:48.813511', 'step': 20240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:48.848278', 'step': 20240, 'epoch': 3} {'type': 'loss', 'content': 0.06604062765836716, 'timestamp': '2025-09-10 02:49:48.851712', 'step': 20241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:48.885146', 'step': 20241, 'epoch': 3} {'type': 'loss', 'content': 0.05062456801533699, 'timestamp': '2025-09-10 02:49:48.889771', 'step': 20242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:48.923586', 'step': 20242, 'epoch': 3} {'type': 'loss', 'content': 0.07787102460861206, 'timestamp': '2025-09-10 02:49:48.926282', 'step': 20243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:48.958516', 'step': 20243, 'epoch': 3} {'type': 'loss', 'content': 0.145650714635849, 'timestamp': '2025-09-10 02:49:48.983406', 'step': 20244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:49.015367', 'step': 20244, 'epoch': 3} {'type': 'loss', 'content': 0.03490527346730232, 'timestamp': '2025-09-10 02:49:49.019528', 'step': 20245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.058801', 'step': 20245, 'epoch': 3} {'type': 'loss', 'content': 0.07318224757909775, 'timestamp': '2025-09-10 02:49:49.066744', 'step': 20246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.113192', 'step': 20246, 'epoch': 3} {'type': 'loss', 'content': 0.04054836556315422, 'timestamp': '2025-09-10 02:49:49.115646', 'step': 20247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.146034', 'step': 20247, 'epoch': 3} {'type': 'loss', 'content': 0.018851570785045624, 'timestamp': '2025-09-10 02:49:49.169812', 'step': 20248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:49.200457', 'step': 20248, 'epoch': 3} {'type': 'loss', 'content': 0.04672626033425331, 'timestamp': '2025-09-10 02:49:49.203903', 'step': 20249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:49:49.234510', 'step': 20249, 'epoch': 3} {'type': 'loss', 'content': 0.04653181508183479, 'timestamp': '2025-09-10 02:49:49.239589', 'step': 20250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.271030', 'step': 20250, 'epoch': 3} {'type': 'loss', 'content': 0.08149585872888565, 'timestamp': '2025-09-10 02:49:49.273460', 'step': 20251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:49.304761', 'step': 20251, 'epoch': 3} {'type': 'loss', 'content': 0.08962012827396393, 'timestamp': '2025-09-10 02:49:49.330136', 'step': 20252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.363295', 'step': 20252, 'epoch': 3} {'type': 'loss', 'content': 0.07403580099344254, 'timestamp': '2025-09-10 02:49:49.367817', 'step': 20253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:49.401299', 'step': 20253, 'epoch': 3} {'type': 'loss', 'content': 0.029910219833254814, 'timestamp': '2025-09-10 02:49:49.404471', 'step': 20254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.434922', 'step': 20254, 'epoch': 3} {'type': 'loss', 'content': 0.17116887867450714, 'timestamp': '2025-09-10 02:49:49.437317', 'step': 20255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:49.468129', 'step': 20255, 'epoch': 3} {'type': 'loss', 'content': 0.07793468981981277, 'timestamp': '2025-09-10 02:49:49.492302', 'step': 20256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.524820', 'step': 20256, 'epoch': 3} {'type': 'loss', 'content': 0.04821797087788582, 'timestamp': '2025-09-10 02:49:49.528314', 'step': 20257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.564100', 'step': 20257, 'epoch': 3} {'type': 'loss', 'content': 0.08462133258581161, 'timestamp': '2025-09-10 02:49:49.568045', 'step': 20258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:49.598868', 'step': 20258, 'epoch': 3} {'type': 'loss', 'content': 0.0851551815867424, 'timestamp': '2025-09-10 02:49:49.601597', 'step': 20259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:49.632413', 'step': 20259, 'epoch': 3} {'type': 'loss', 'content': 0.07496566325426102, 'timestamp': '2025-09-10 02:49:49.656170', 'step': 20260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.687743', 'step': 20260, 'epoch': 3} {'type': 'loss', 'content': 0.024992510676383972, 'timestamp': '2025-09-10 02:49:49.690002', 'step': 20261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:49.719300', 'step': 20261, 'epoch': 3} {'type': 'loss', 'content': 0.0033895850647240877, 'timestamp': '2025-09-10 02:49:49.721700', 'step': 20262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:49.752586', 'step': 20262, 'epoch': 3} {'type': 'loss', 'content': 0.07282388210296631, 'timestamp': '2025-09-10 02:49:49.755245', 'step': 20263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:49.785927', 'step': 20263, 'epoch': 3} {'type': 'loss', 'content': 0.12098768353462219, 'timestamp': '2025-09-10 02:49:49.809491', 'step': 20264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:49.840285', 'step': 20264, 'epoch': 3} {'type': 'loss', 'content': 0.057691000401973724, 'timestamp': '2025-09-10 02:49:49.843536', 'step': 20265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:49.873569', 'step': 20265, 'epoch': 3} {'type': 'loss', 'content': 0.07346916198730469, 'timestamp': '2025-09-10 02:49:49.875893', 'step': 20266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:49.905533', 'step': 20266, 'epoch': 3} {'type': 'loss', 'content': 0.10214880853891373, 'timestamp': '2025-09-10 02:49:49.908228', 'step': 20267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:49.938253', 'step': 20267, 'epoch': 3} {'type': 'loss', 'content': 0.07102641463279724, 'timestamp': '2025-09-10 02:49:49.961836', 'step': 20268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:49.992383', 'step': 20268, 'epoch': 3} {'type': 'loss', 'content': 0.07943259179592133, 'timestamp': '2025-09-10 02:49:49.994757', 'step': 20269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:50.024588', 'step': 20269, 'epoch': 3} {'type': 'loss', 'content': 0.0909700021147728, 'timestamp': '2025-09-10 02:49:50.027271', 'step': 20270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:50.059052', 'step': 20270, 'epoch': 3} {'type': 'loss', 'content': 0.10159911960363388, 'timestamp': '2025-09-10 02:49:50.063185', 'step': 20271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.093220', 'step': 20271, 'epoch': 3} {'type': 'loss', 'content': 0.07662320882081985, 'timestamp': '2025-09-10 02:49:50.116513', 'step': 20272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.146565', 'step': 20272, 'epoch': 3} {'type': 'loss', 'content': 0.0480772964656353, 'timestamp': '2025-09-10 02:49:50.149011', 'step': 20273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:50.179305', 'step': 20273, 'epoch': 3} {'type': 'loss', 'content': 0.08940092474222183, 'timestamp': '2025-09-10 02:49:50.181908', 'step': 20274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:50.211643', 'step': 20274, 'epoch': 3} {'type': 'loss', 'content': 0.15889495611190796, 'timestamp': '2025-09-10 02:49:50.213827', 'step': 20275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:50.243625', 'step': 20275, 'epoch': 3} {'type': 'loss', 'content': 0.08173854649066925, 'timestamp': '2025-09-10 02:49:50.267382', 'step': 20276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.299315', 'step': 20276, 'epoch': 3} {'type': 'loss', 'content': 0.0846303179860115, 'timestamp': '2025-09-10 02:49:50.301695', 'step': 20277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:50.331657', 'step': 20277, 'epoch': 3} {'type': 'loss', 'content': 0.054268427193164825, 'timestamp': '2025-09-10 02:49:50.334343', 'step': 20278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.364097', 'step': 20278, 'epoch': 3} {'type': 'loss', 'content': 0.03732999041676521, 'timestamp': '2025-09-10 02:49:50.367758', 'step': 20279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:50.397779', 'step': 20279, 'epoch': 3} {'type': 'loss', 'content': 0.0563405379652977, 'timestamp': '2025-09-10 02:49:50.421428', 'step': 20280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.452544', 'step': 20280, 'epoch': 3} {'type': 'loss', 'content': 0.04967012256383896, 'timestamp': '2025-09-10 02:49:50.454823', 'step': 20281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.485382', 'step': 20281, 'epoch': 3} {'type': 'loss', 'content': 0.03924993798136711, 'timestamp': '2025-09-10 02:49:50.489194', 'step': 20282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.518790', 'step': 20282, 'epoch': 3} {'type': 'loss', 'content': 0.08163020759820938, 'timestamp': '2025-09-10 02:49:50.523630', 'step': 20283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.559886', 'step': 20283, 'epoch': 3} {'type': 'loss', 'content': 0.06740918755531311, 'timestamp': '2025-09-10 02:49:50.583765', 'step': 20284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:50.615488', 'step': 20284, 'epoch': 3} {'type': 'loss', 'content': 0.05077844485640526, 'timestamp': '2025-09-10 02:49:50.617953', 'step': 20285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:50.648164', 'step': 20285, 'epoch': 3} {'type': 'loss', 'content': 0.12250523269176483, 'timestamp': '2025-09-10 02:49:50.653083', 'step': 20286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.684253', 'step': 20286, 'epoch': 3} {'type': 'loss', 'content': 0.04715714603662491, 'timestamp': '2025-09-10 02:49:50.687450', 'step': 20287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.717492', 'step': 20287, 'epoch': 3} {'type': 'loss', 'content': 0.021117905154824257, 'timestamp': '2025-09-10 02:49:50.744960', 'step': 20288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:50.782913', 'step': 20288, 'epoch': 3} {'type': 'loss', 'content': 0.07557756453752518, 'timestamp': '2025-09-10 02:49:50.788915', 'step': 20289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:50.827733', 'step': 20289, 'epoch': 3} {'type': 'loss', 'content': 0.05322171747684479, 'timestamp': '2025-09-10 02:49:50.829963', 'step': 20290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:50.859436', 'step': 20290, 'epoch': 3} {'type': 'loss', 'content': 0.05070486292243004, 'timestamp': '2025-09-10 02:49:50.862379', 'step': 20291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.892391', 'step': 20291, 'epoch': 3} {'type': 'loss', 'content': 0.10480161011219025, 'timestamp': '2025-09-10 02:49:50.916308', 'step': 20292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:50.946265', 'step': 20292, 'epoch': 3} {'type': 'loss', 'content': 0.051552239805459976, 'timestamp': '2025-09-10 02:49:50.951512', 'step': 20293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:50.982180', 'step': 20293, 'epoch': 3} {'type': 'loss', 'content': 0.05260516330599785, 'timestamp': '2025-09-10 02:49:50.984351', 'step': 20294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:51.018677', 'step': 20294, 'epoch': 3} {'type': 'loss', 'content': 0.019878501072525978, 'timestamp': '2025-09-10 02:49:51.021240', 'step': 20295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:51.050924', 'step': 20295, 'epoch': 3} {'type': 'loss', 'content': 0.04215184971690178, 'timestamp': '2025-09-10 02:49:51.074637', 'step': 20296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:51.105763', 'step': 20296, 'epoch': 3} {'type': 'loss', 'content': 0.0383710041642189, 'timestamp': '2025-09-10 02:49:51.108375', 'step': 20297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:51.138681', 'step': 20297, 'epoch': 3} {'type': 'loss', 'content': 0.05249250680208206, 'timestamp': '2025-09-10 02:49:51.140795', 'step': 20298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:51.170492', 'step': 20298, 'epoch': 3} {'type': 'loss', 'content': 0.06618262082338333, 'timestamp': '2025-09-10 02:49:51.173170', 'step': 20299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:51.205180', 'step': 20299, 'epoch': 3} {'type': 'loss', 'content': 0.020930584520101547, 'timestamp': '2025-09-10 02:49:51.229034', 'step': 20300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:51.259991', 'step': 20300, 'epoch': 3} {'type': 'loss', 'content': 0.057288363575935364, 'timestamp': '2025-09-10 02:49:51.262382', 'step': 20301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:51.292881', 'step': 20301, 'epoch': 3} {'type': 'loss', 'content': 0.09777114540338516, 'timestamp': '2025-09-10 02:49:51.295137', 'step': 20302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:51.325217', 'step': 20302, 'epoch': 3} {'type': 'loss', 'content': 0.029303131625056267, 'timestamp': '2025-09-10 02:49:51.327433', 'step': 20303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:51.357445', 'step': 20303, 'epoch': 3} {'type': 'loss', 'content': 0.08056323975324631, 'timestamp': '2025-09-10 02:49:51.381155', 'step': 20304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:51.412326', 'step': 20304, 'epoch': 3} {'type': 'loss', 'content': 0.06278713047504425, 'timestamp': '2025-09-10 02:49:51.414881', 'step': 20305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:51.445084', 'step': 20305, 'epoch': 3} {'type': 'loss', 'content': 0.008622849360108376, 'timestamp': '2025-09-10 02:49:51.447731', 'step': 20306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:51.478466', 'step': 20306, 'epoch': 3} {'type': 'loss', 'content': 0.06140205264091492, 'timestamp': '2025-09-10 02:49:51.483629', 'step': 20307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:51.516885', 'step': 20307, 'epoch': 3} {'type': 'loss', 'content': 0.11214778572320938, 'timestamp': '2025-09-10 02:49:51.541012', 'step': 20308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:51.570923', 'step': 20308, 'epoch': 3} {'type': 'loss', 'content': 0.0633050873875618, 'timestamp': '2025-09-10 02:49:51.573470', 'step': 20309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:51.604193', 'step': 20309, 'epoch': 3} {'type': 'loss', 'content': 0.10299945622682571, 'timestamp': '2025-09-10 02:49:51.606955', 'step': 20310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:51.637931', 'step': 20310, 'epoch': 3} {'type': 'loss', 'content': 0.039254866540431976, 'timestamp': '2025-09-10 02:49:51.640360', 'step': 20311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:51.672704', 'step': 20311, 'epoch': 3} {'type': 'loss', 'content': 0.10927236080169678, 'timestamp': '2025-09-10 02:49:51.696241', 'step': 20312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:51.727905', 'step': 20312, 'epoch': 3} {'type': 'loss', 'content': 0.04476744309067726, 'timestamp': '2025-09-10 02:49:51.730663', 'step': 20313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:51.761770', 'step': 20313, 'epoch': 3} {'type': 'loss', 'content': 0.05930543690919876, 'timestamp': '2025-09-10 02:49:51.764315', 'step': 20314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:51.796261', 'step': 20314, 'epoch': 3} {'type': 'loss', 'content': 0.07466065883636475, 'timestamp': '2025-09-10 02:49:51.798679', 'step': 20315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:51.828869', 'step': 20315, 'epoch': 3} {'type': 'loss', 'content': 0.05228772759437561, 'timestamp': '2025-09-10 02:49:51.852326', 'step': 20316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:51.883400', 'step': 20316, 'epoch': 3} {'type': 'loss', 'content': 0.0643419474363327, 'timestamp': '2025-09-10 02:49:51.891537', 'step': 20317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:51.923140', 'step': 20317, 'epoch': 3} {'type': 'loss', 'content': 0.11814920604228973, 'timestamp': '2025-09-10 02:49:51.925671', 'step': 20318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:51.960857', 'step': 20318, 'epoch': 3} {'type': 'loss', 'content': 0.07836445420980453, 'timestamp': '2025-09-10 02:49:51.962955', 'step': 20319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:51.992631', 'step': 20319, 'epoch': 3} {'type': 'loss', 'content': 0.0522761270403862, 'timestamp': '2025-09-10 02:49:52.016956', 'step': 20320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.047610', 'step': 20320, 'epoch': 3} {'type': 'loss', 'content': 0.053867727518081665, 'timestamp': '2025-09-10 02:49:52.050028', 'step': 20321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:52.081432', 'step': 20321, 'epoch': 3} {'type': 'loss', 'content': 0.035582609474658966, 'timestamp': '2025-09-10 02:49:52.092769', 'step': 20322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:52.124268', 'step': 20322, 'epoch': 3} {'type': 'loss', 'content': 0.07329632341861725, 'timestamp': '2025-09-10 02:49:52.127659', 'step': 20323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:52.164743', 'step': 20323, 'epoch': 3} {'type': 'loss', 'content': 0.026144614443182945, 'timestamp': '2025-09-10 02:49:52.189137', 'step': 20324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.219867', 'step': 20324, 'epoch': 3} {'type': 'loss', 'content': 0.02265840210020542, 'timestamp': '2025-09-10 02:49:52.222446', 'step': 20325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.253012', 'step': 20325, 'epoch': 3} {'type': 'loss', 'content': 0.05921974033117294, 'timestamp': '2025-09-10 02:49:52.255649', 'step': 20326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.286556', 'step': 20326, 'epoch': 3} {'type': 'loss', 'content': 0.059147316962480545, 'timestamp': '2025-09-10 02:49:52.289664', 'step': 20327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:52.324991', 'step': 20327, 'epoch': 3} {'type': 'loss', 'content': 0.07122795283794403, 'timestamp': '2025-09-10 02:49:52.348763', 'step': 20328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:52.381186', 'step': 20328, 'epoch': 3} {'type': 'loss', 'content': 0.04999573156237602, 'timestamp': '2025-09-10 02:49:52.383988', 'step': 20329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:52.422133', 'step': 20329, 'epoch': 3} {'type': 'loss', 'content': 0.0886688306927681, 'timestamp': '2025-09-10 02:49:52.424370', 'step': 20330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.456091', 'step': 20330, 'epoch': 3} {'type': 'loss', 'content': 0.11580631881952286, 'timestamp': '2025-09-10 02:49:52.458569', 'step': 20331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:52.488314', 'step': 20331, 'epoch': 3} {'type': 'loss', 'content': 0.1360866129398346, 'timestamp': '2025-09-10 02:49:52.512208', 'step': 20332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.544811', 'step': 20332, 'epoch': 3} {'type': 'loss', 'content': 0.05726158246397972, 'timestamp': '2025-09-10 02:49:52.547249', 'step': 20333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:52.580485', 'step': 20333, 'epoch': 3} {'type': 'loss', 'content': 0.03335878252983093, 'timestamp': '2025-09-10 02:49:52.583251', 'step': 20334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.613041', 'step': 20334, 'epoch': 3} {'type': 'loss', 'content': 0.07991982996463776, 'timestamp': '2025-09-10 02:49:52.622447', 'step': 20335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.659595', 'step': 20335, 'epoch': 3} {'type': 'loss', 'content': 0.13543160259723663, 'timestamp': '2025-09-10 02:49:52.683287', 'step': 20336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:52.713495', 'step': 20336, 'epoch': 3} {'type': 'loss', 'content': 0.06389124691486359, 'timestamp': '2025-09-10 02:49:52.716115', 'step': 20337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:52.746342', 'step': 20337, 'epoch': 3} {'type': 'loss', 'content': 0.10232912003993988, 'timestamp': '2025-09-10 02:49:52.748836', 'step': 20338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.779035', 'step': 20338, 'epoch': 3} {'type': 'loss', 'content': 0.019540030509233475, 'timestamp': '2025-09-10 02:49:52.781994', 'step': 20339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:52.811807', 'step': 20339, 'epoch': 3} {'type': 'loss', 'content': 0.017655115574598312, 'timestamp': '2025-09-10 02:49:52.835850', 'step': 20340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.865762', 'step': 20340, 'epoch': 3} {'type': 'loss', 'content': 0.05315517261624336, 'timestamp': '2025-09-10 02:49:52.867879', 'step': 20341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.897936', 'step': 20341, 'epoch': 3} {'type': 'loss', 'content': 0.1153295636177063, 'timestamp': '2025-09-10 02:49:52.901267', 'step': 20342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:52.931041', 'step': 20342, 'epoch': 3} {'type': 'loss', 'content': 0.01274669636040926, 'timestamp': '2025-09-10 02:49:52.933285', 'step': 20343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:52.962940', 'step': 20343, 'epoch': 3} {'type': 'loss', 'content': 0.0914430171251297, 'timestamp': '2025-09-10 02:49:52.986661', 'step': 20344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:53.016465', 'step': 20344, 'epoch': 3} {'type': 'loss', 'content': 0.08522467315196991, 'timestamp': '2025-09-10 02:49:53.018765', 'step': 20345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:53.049226', 'step': 20345, 'epoch': 3} {'type': 'loss', 'content': 0.04691345989704132, 'timestamp': '2025-09-10 02:49:53.052257', 'step': 20346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:53.082400', 'step': 20346, 'epoch': 3} {'type': 'loss', 'content': 0.04824717342853546, 'timestamp': '2025-09-10 02:49:53.085236', 'step': 20347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:53.115545', 'step': 20347, 'epoch': 3} {'type': 'loss', 'content': 0.1290096491575241, 'timestamp': '2025-09-10 02:49:53.139183', 'step': 20348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:53.169859', 'step': 20348, 'epoch': 3} {'type': 'loss', 'content': 0.023760341107845306, 'timestamp': '2025-09-10 02:49:53.171912', 'step': 20349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:53.202357', 'step': 20349, 'epoch': 3} {'type': 'loss', 'content': 0.04233440384268761, 'timestamp': '2025-09-10 02:49:53.205153', 'step': 20350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:53.235105', 'step': 20350, 'epoch': 3} {'type': 'loss', 'content': 0.06103026121854782, 'timestamp': '2025-09-10 02:49:53.238854', 'step': 20351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:53.270867', 'step': 20351, 'epoch': 3} {'type': 'loss', 'content': 0.055513620376586914, 'timestamp': '2025-09-10 02:49:53.294675', 'step': 20352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:53.327849', 'step': 20352, 'epoch': 3} {'type': 'loss', 'content': 0.08524994552135468, 'timestamp': '2025-09-10 02:49:53.330184', 'step': 20353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:53.360846', 'step': 20353, 'epoch': 3} {'type': 'loss', 'content': 0.07238447666168213, 'timestamp': '2025-09-10 02:49:53.363422', 'step': 20354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:53.393752', 'step': 20354, 'epoch': 3} {'type': 'loss', 'content': 0.05133094638586044, 'timestamp': '2025-09-10 02:49:53.396096', 'step': 20355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:53.425272', 'step': 20355, 'epoch': 3} {'type': 'loss', 'content': 0.07833029329776764, 'timestamp': '2025-09-10 02:49:53.449213', 'step': 20356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:53.479179', 'step': 20356, 'epoch': 3} {'type': 'loss', 'content': 0.031118543818593025, 'timestamp': '2025-09-10 02:49:53.481921', 'step': 20357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:53.512038', 'step': 20357, 'epoch': 3} {'type': 'loss', 'content': 0.10819089412689209, 'timestamp': '2025-09-10 02:49:53.514048', 'step': 20358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:53.544444', 'step': 20358, 'epoch': 3} {'type': 'loss', 'content': 0.08440417051315308, 'timestamp': '2025-09-10 02:49:53.546847', 'step': 20359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:53.576225', 'step': 20359, 'epoch': 3} {'type': 'loss', 'content': 0.07489084452390671, 'timestamp': '2025-09-10 02:49:53.599834', 'step': 20360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:53.631647', 'step': 20360, 'epoch': 3} {'type': 'loss', 'content': 0.13115009665489197, 'timestamp': '2025-09-10 02:49:53.633864', 'step': 20361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:53.664171', 'step': 20361, 'epoch': 3} {'type': 'loss', 'content': 0.09196946769952774, 'timestamp': '2025-09-10 02:49:53.666468', 'step': 20362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:53.696494', 'step': 20362, 'epoch': 3} {'type': 'loss', 'content': 0.05162752792239189, 'timestamp': '2025-09-10 02:49:53.699184', 'step': 20363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:53.729610', 'step': 20363, 'epoch': 3} {'type': 'loss', 'content': 0.06100638210773468, 'timestamp': '2025-09-10 02:49:53.753475', 'step': 20364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:53.785155', 'step': 20364, 'epoch': 3} {'type': 'loss', 'content': 0.034729763865470886, 'timestamp': '2025-09-10 02:49:53.787764', 'step': 20365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:53.819348', 'step': 20365, 'epoch': 3} {'type': 'loss', 'content': 0.10448811948299408, 'timestamp': '2025-09-10 02:49:53.821841', 'step': 20366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:53.851974', 'step': 20366, 'epoch': 3} {'type': 'loss', 'content': 0.015316338278353214, 'timestamp': '2025-09-10 02:49:53.854653', 'step': 20367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:53.884751', 'step': 20367, 'epoch': 3} {'type': 'loss', 'content': 0.04055432602763176, 'timestamp': '2025-09-10 02:49:53.908616', 'step': 20368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:53.941058', 'step': 20368, 'epoch': 3} {'type': 'loss', 'content': 0.08399798721075058, 'timestamp': '2025-09-10 02:49:53.943358', 'step': 20369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:53.973791', 'step': 20369, 'epoch': 3} {'type': 'loss', 'content': 0.03839890658855438, 'timestamp': '2025-09-10 02:49:53.977013', 'step': 20370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.009278', 'step': 20370, 'epoch': 3} {'type': 'loss', 'content': 0.09993065893650055, 'timestamp': '2025-09-10 02:49:54.013094', 'step': 20371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.043913', 'step': 20371, 'epoch': 3} {'type': 'loss', 'content': 0.020323360338807106, 'timestamp': '2025-09-10 02:49:54.067732', 'step': 20372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:54.098442', 'step': 20372, 'epoch': 3} {'type': 'loss', 'content': 0.060738705098629, 'timestamp': '2025-09-10 02:49:54.100660', 'step': 20373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:54.131285', 'step': 20373, 'epoch': 3} {'type': 'loss', 'content': 0.09042180329561234, 'timestamp': '2025-09-10 02:49:54.133517', 'step': 20374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:54.163520', 'step': 20374, 'epoch': 3} {'type': 'loss', 'content': 0.04979320615530014, 'timestamp': '2025-09-10 02:49:54.166508', 'step': 20375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:54.197126', 'step': 20375, 'epoch': 3} {'type': 'loss', 'content': 0.08324568718671799, 'timestamp': '2025-09-10 02:49:54.220877', 'step': 20376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:54.254914', 'step': 20376, 'epoch': 3} {'type': 'loss', 'content': 0.08606483042240143, 'timestamp': '2025-09-10 02:49:54.257379', 'step': 20377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:54.288215', 'step': 20377, 'epoch': 3} {'type': 'loss', 'content': 0.0967637225985527, 'timestamp': '2025-09-10 02:49:54.290229', 'step': 20378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:54.321078', 'step': 20378, 'epoch': 3} {'type': 'loss', 'content': 0.05433594062924385, 'timestamp': '2025-09-10 02:49:54.323418', 'step': 20379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.355079', 'step': 20379, 'epoch': 3} {'type': 'loss', 'content': 0.07319820672273636, 'timestamp': '2025-09-10 02:49:54.378963', 'step': 20380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:54.410478', 'step': 20380, 'epoch': 3} {'type': 'loss', 'content': 0.03575116768479347, 'timestamp': '2025-09-10 02:49:54.412913', 'step': 20381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:54.445203', 'step': 20381, 'epoch': 3} {'type': 'loss', 'content': 0.055838778614997864, 'timestamp': '2025-09-10 02:49:54.447459', 'step': 20382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.477791', 'step': 20382, 'epoch': 3} {'type': 'loss', 'content': 0.026614278554916382, 'timestamp': '2025-09-10 02:49:54.480451', 'step': 20383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:54.510710', 'step': 20383, 'epoch': 3} {'type': 'loss', 'content': 0.030261917039752007, 'timestamp': '2025-09-10 02:49:54.534496', 'step': 20384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.565203', 'step': 20384, 'epoch': 3} {'type': 'loss', 'content': 0.03226285055279732, 'timestamp': '2025-09-10 02:49:54.567771', 'step': 20385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.598788', 'step': 20385, 'epoch': 3} {'type': 'loss', 'content': 0.09388965368270874, 'timestamp': '2025-09-10 02:49:54.601869', 'step': 20386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:54.632425', 'step': 20386, 'epoch': 3} {'type': 'loss', 'content': 0.05730002745985985, 'timestamp': '2025-09-10 02:49:54.634763', 'step': 20387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.664718', 'step': 20387, 'epoch': 3} {'type': 'loss', 'content': 0.12767893075942993, 'timestamp': '2025-09-10 02:49:54.688222', 'step': 20388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:54.719283', 'step': 20388, 'epoch': 3} {'type': 'loss', 'content': 0.024104375392198563, 'timestamp': '2025-09-10 02:49:54.723426', 'step': 20389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.753522', 'step': 20389, 'epoch': 3} {'type': 'loss', 'content': 0.15220022201538086, 'timestamp': '2025-09-10 02:49:54.755802', 'step': 20390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:54.785901', 'step': 20390, 'epoch': 3} {'type': 'loss', 'content': 0.08305234462022781, 'timestamp': '2025-09-10 02:49:54.788597', 'step': 20391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.820127', 'step': 20391, 'epoch': 3} {'type': 'loss', 'content': 0.019781244918704033, 'timestamp': '2025-09-10 02:49:54.845848', 'step': 20392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:54.876951', 'step': 20392, 'epoch': 3} {'type': 'loss', 'content': 0.05333938077092171, 'timestamp': '2025-09-10 02:49:54.880596', 'step': 20393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:54.911143', 'step': 20393, 'epoch': 3} {'type': 'loss', 'content': 0.06668905913829803, 'timestamp': '2025-09-10 02:49:54.913443', 'step': 20394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:54.943621', 'step': 20394, 'epoch': 3} {'type': 'loss', 'content': 0.054150596261024475, 'timestamp': '2025-09-10 02:49:54.945910', 'step': 20395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:54.975869', 'step': 20395, 'epoch': 3} {'type': 'loss', 'content': 0.060408882796764374, 'timestamp': '2025-09-10 02:49:54.999851', 'step': 20396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:55.031616', 'step': 20396, 'epoch': 3} {'type': 'loss', 'content': 0.13577806949615479, 'timestamp': '2025-09-10 02:49:55.034265', 'step': 20397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:55.064556', 'step': 20397, 'epoch': 3} {'type': 'loss', 'content': 0.04209721088409424, 'timestamp': '2025-09-10 02:49:55.066594', 'step': 20398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.096346', 'step': 20398, 'epoch': 3} {'type': 'loss', 'content': 0.10322760790586472, 'timestamp': '2025-09-10 02:49:55.098730', 'step': 20399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.129424', 'step': 20399, 'epoch': 3} {'type': 'loss', 'content': 0.05098021402955055, 'timestamp': '2025-09-10 02:49:55.153003', 'step': 20400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.183801', 'step': 20400, 'epoch': 3} {'type': 'loss', 'content': 0.09788589924573898, 'timestamp': '2025-09-10 02:49:55.186132', 'step': 20401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:55.216198', 'step': 20401, 'epoch': 3} {'type': 'loss', 'content': 0.05010625720024109, 'timestamp': '2025-09-10 02:49:55.218281', 'step': 20402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:55.248398', 'step': 20402, 'epoch': 3} {'type': 'loss', 'content': 0.054024454206228256, 'timestamp': '2025-09-10 02:49:55.251191', 'step': 20403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:55.281841', 'step': 20403, 'epoch': 3} {'type': 'loss', 'content': 0.052302900701761246, 'timestamp': '2025-09-10 02:49:55.305529', 'step': 20404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:55.336639', 'step': 20404, 'epoch': 3} {'type': 'loss', 'content': 0.0637555718421936, 'timestamp': '2025-09-10 02:49:55.338980', 'step': 20405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:55.369641', 'step': 20405, 'epoch': 3} {'type': 'loss', 'content': 0.09791115671396255, 'timestamp': '2025-09-10 02:49:55.372106', 'step': 20406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:55.402511', 'step': 20406, 'epoch': 3} {'type': 'loss', 'content': 0.045588452368974686, 'timestamp': '2025-09-10 02:49:55.405707', 'step': 20407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.439721', 'step': 20407, 'epoch': 3} {'type': 'loss', 'content': 0.05035972222685814, 'timestamp': '2025-09-10 02:49:55.464140', 'step': 20408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.505727', 'step': 20408, 'epoch': 3} {'type': 'loss', 'content': 0.04602128639817238, 'timestamp': '2025-09-10 02:49:55.508584', 'step': 20409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:55.539955', 'step': 20409, 'epoch': 3} {'type': 'loss', 'content': 0.08676654100418091, 'timestamp': '2025-09-10 02:49:55.542547', 'step': 20410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:55.579051', 'step': 20410, 'epoch': 3} {'type': 'loss', 'content': 0.027573266997933388, 'timestamp': '2025-09-10 02:49:55.581689', 'step': 20411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.615287', 'step': 20411, 'epoch': 3} {'type': 'loss', 'content': 0.061545465141534805, 'timestamp': '2025-09-10 02:49:55.639426', 'step': 20412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.671355', 'step': 20412, 'epoch': 3} {'type': 'loss', 'content': 0.07738993316888809, 'timestamp': '2025-09-10 02:49:55.673769', 'step': 20413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:55.705097', 'step': 20413, 'epoch': 3} {'type': 'loss', 'content': 0.07429680228233337, 'timestamp': '2025-09-10 02:49:55.707774', 'step': 20414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:55.738177', 'step': 20414, 'epoch': 3} {'type': 'loss', 'content': 0.08688397705554962, 'timestamp': '2025-09-10 02:49:55.741029', 'step': 20415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.778120', 'step': 20415, 'epoch': 3} {'type': 'loss', 'content': 0.1230117604136467, 'timestamp': '2025-09-10 02:49:55.803179', 'step': 20416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.840375', 'step': 20416, 'epoch': 3} {'type': 'loss', 'content': 0.06058524549007416, 'timestamp': '2025-09-10 02:49:55.846258', 'step': 20417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:55.877974', 'step': 20417, 'epoch': 3} {'type': 'loss', 'content': 0.033449724316596985, 'timestamp': '2025-09-10 02:49:55.881480', 'step': 20418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:55.913646', 'step': 20418, 'epoch': 3} {'type': 'loss', 'content': 0.1000458225607872, 'timestamp': '2025-09-10 02:49:55.916182', 'step': 20419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:55.948419', 'step': 20419, 'epoch': 3} {'type': 'loss', 'content': 0.09797905385494232, 'timestamp': '2025-09-10 02:49:55.972832', 'step': 20420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:56.019744', 'step': 20420, 'epoch': 3} {'type': 'loss', 'content': 0.04882626235485077, 'timestamp': '2025-09-10 02:49:56.031567', 'step': 20421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:56.066651', 'step': 20421, 'epoch': 3} {'type': 'loss', 'content': 0.0555543415248394, 'timestamp': '2025-09-10 02:49:56.070535', 'step': 20422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:56.101327', 'step': 20422, 'epoch': 3} {'type': 'loss', 'content': 0.0025389008224010468, 'timestamp': '2025-09-10 02:49:56.106086', 'step': 20423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:56.150429', 'step': 20423, 'epoch': 3} {'type': 'loss', 'content': 0.08026044070720673, 'timestamp': '2025-09-10 02:49:56.176109', 'step': 20424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:56.213182', 'step': 20424, 'epoch': 3} {'type': 'loss', 'content': 0.07402835041284561, 'timestamp': '2025-09-10 02:49:56.215646', 'step': 20425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:49:56.251192', 'step': 20425, 'epoch': 3} {'type': 'loss', 'content': 0.12289661914110184, 'timestamp': '2025-09-10 02:49:56.255565', 'step': 20426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:56.286216', 'step': 20426, 'epoch': 3} {'type': 'loss', 'content': 0.11506272852420807, 'timestamp': '2025-09-10 02:49:56.288630', 'step': 20427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:56.319628', 'step': 20427, 'epoch': 3} {'type': 'loss', 'content': 0.05615876987576485, 'timestamp': '2025-09-10 02:49:56.343222', 'step': 20428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:56.375125', 'step': 20428, 'epoch': 3} {'type': 'loss', 'content': 0.04319828003644943, 'timestamp': '2025-09-10 02:49:56.378080', 'step': 20429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:56.408146', 'step': 20429, 'epoch': 3} {'type': 'loss', 'content': 0.06073666736483574, 'timestamp': '2025-09-10 02:49:56.410588', 'step': 20430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:56.440534', 'step': 20430, 'epoch': 3} {'type': 'loss', 'content': 0.0865512266755104, 'timestamp': '2025-09-10 02:49:56.443334', 'step': 20431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:56.474578', 'step': 20431, 'epoch': 3} {'type': 'loss', 'content': 0.017614450305700302, 'timestamp': '2025-09-10 02:49:56.498104', 'step': 20432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:56.528773', 'step': 20432, 'epoch': 3} {'type': 'loss', 'content': 0.03565766662359238, 'timestamp': '2025-09-10 02:49:56.531372', 'step': 20433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:56.561334', 'step': 20433, 'epoch': 3} {'type': 'loss', 'content': 0.037508998066186905, 'timestamp': '2025-09-10 02:49:56.564011', 'step': 20434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:56.594705', 'step': 20434, 'epoch': 3} {'type': 'loss', 'content': 0.0634092390537262, 'timestamp': '2025-09-10 02:49:56.597298', 'step': 20435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:56.627143', 'step': 20435, 'epoch': 3} {'type': 'loss', 'content': 0.03460155427455902, 'timestamp': '2025-09-10 02:49:56.650974', 'step': 20436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:56.681202', 'step': 20436, 'epoch': 3} {'type': 'loss', 'content': 0.07598290592432022, 'timestamp': '2025-09-10 02:49:56.683555', 'step': 20437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:56.715453', 'step': 20437, 'epoch': 3} {'type': 'loss', 'content': 0.09568686038255692, 'timestamp': '2025-09-10 02:49:56.717998', 'step': 20438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:56.756296', 'step': 20438, 'epoch': 3} {'type': 'loss', 'content': 0.07110944390296936, 'timestamp': '2025-09-10 02:49:56.758618', 'step': 20439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:56.790415', 'step': 20439, 'epoch': 3} {'type': 'loss', 'content': 0.03394835442304611, 'timestamp': '2025-09-10 02:49:56.814391', 'step': 20440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:56.855043', 'step': 20440, 'epoch': 3} {'type': 'loss', 'content': 0.053614675998687744, 'timestamp': '2025-09-10 02:49:56.857923', 'step': 20441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:56.891689', 'step': 20441, 'epoch': 3} {'type': 'loss', 'content': 0.0406842939555645, 'timestamp': '2025-09-10 02:49:56.895575', 'step': 20442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:56.925660', 'step': 20442, 'epoch': 3} {'type': 'loss', 'content': 0.053604476153850555, 'timestamp': '2025-09-10 02:49:56.928462', 'step': 20443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:56.958444', 'step': 20443, 'epoch': 3} {'type': 'loss', 'content': 0.03813152387738228, 'timestamp': '2025-09-10 02:49:56.982610', 'step': 20444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:57.017233', 'step': 20444, 'epoch': 3} {'type': 'loss', 'content': 0.01860024780035019, 'timestamp': '2025-09-10 02:49:57.020428', 'step': 20445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:49:57.051635', 'step': 20445, 'epoch': 3} {'type': 'loss', 'content': 0.11610250920057297, 'timestamp': '2025-09-10 02:49:57.058525', 'step': 20446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:49:57.096489', 'step': 20446, 'epoch': 3} {'type': 'loss', 'content': 0.06295685470104218, 'timestamp': '2025-09-10 02:49:57.102632', 'step': 20447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:57.132839', 'step': 20447, 'epoch': 3} {'type': 'loss', 'content': 0.09439343214035034, 'timestamp': '2025-09-10 02:49:57.156404', 'step': 20448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:57.186789', 'step': 20448, 'epoch': 3} {'type': 'loss', 'content': 0.03996311500668526, 'timestamp': '2025-09-10 02:49:57.189365', 'step': 20449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:57.221337', 'step': 20449, 'epoch': 3} {'type': 'loss', 'content': 0.0226906705647707, 'timestamp': '2025-09-10 02:49:57.223821', 'step': 20450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:57.253137', 'step': 20450, 'epoch': 3} {'type': 'loss', 'content': 0.05821262300014496, 'timestamp': '2025-09-10 02:49:57.256594', 'step': 20451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:57.493709', 'step': 20451, 'epoch': 3} {'type': 'loss', 'content': 0.09993317723274231, 'timestamp': '2025-09-10 02:49:57.518345', 'step': 20452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:57.550724', 'step': 20452, 'epoch': 3} {'type': 'loss', 'content': 0.01974366419017315, 'timestamp': '2025-09-10 02:49:57.553254', 'step': 20453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:57.591949', 'step': 20453, 'epoch': 3} {'type': 'loss', 'content': 0.09817883372306824, 'timestamp': '2025-09-10 02:49:57.595724', 'step': 20454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:49:57.627645', 'step': 20454, 'epoch': 3} {'type': 'loss', 'content': 0.10649965703487396, 'timestamp': '2025-09-10 02:49:57.632040', 'step': 20455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:57.663127', 'step': 20455, 'epoch': 3} {'type': 'loss', 'content': 0.09147977083921432, 'timestamp': '2025-09-10 02:49:57.687259', 'step': 20456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:57.722654', 'step': 20456, 'epoch': 3} {'type': 'loss', 'content': 0.03766573593020439, 'timestamp': '2025-09-10 02:49:57.726182', 'step': 20457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:57.757430', 'step': 20457, 'epoch': 3} {'type': 'loss', 'content': 0.043199971318244934, 'timestamp': '2025-09-10 02:49:57.762122', 'step': 20458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:57.792505', 'step': 20458, 'epoch': 3} {'type': 'loss', 'content': 0.09802065044641495, 'timestamp': '2025-09-10 02:49:57.795511', 'step': 20459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:57.825609', 'step': 20459, 'epoch': 3} {'type': 'loss', 'content': 0.05638546124100685, 'timestamp': '2025-09-10 02:49:57.849077', 'step': 20460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:57.880706', 'step': 20460, 'epoch': 3} {'type': 'loss', 'content': 0.03347137197852135, 'timestamp': '2025-09-10 02:49:57.883765', 'step': 20461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:57.916973', 'step': 20461, 'epoch': 3} {'type': 'loss', 'content': 0.070468969643116, 'timestamp': '2025-09-10 02:49:57.919407', 'step': 20462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:57.949759', 'step': 20462, 'epoch': 3} {'type': 'loss', 'content': 0.07228773832321167, 'timestamp': '2025-09-10 02:49:57.952123', 'step': 20463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:57.982147', 'step': 20463, 'epoch': 3} {'type': 'loss', 'content': 0.06629619002342224, 'timestamp': '2025-09-10 02:49:58.005658', 'step': 20464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:58.036198', 'step': 20464, 'epoch': 3} {'type': 'loss', 'content': 0.05676829814910889, 'timestamp': '2025-09-10 02:49:58.038889', 'step': 20465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.073504', 'step': 20465, 'epoch': 3} {'type': 'loss', 'content': 0.05071133002638817, 'timestamp': '2025-09-10 02:49:58.075994', 'step': 20466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.106756', 'step': 20466, 'epoch': 3} {'type': 'loss', 'content': 0.025854000821709633, 'timestamp': '2025-09-10 02:49:58.109004', 'step': 20467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:58.139616', 'step': 20467, 'epoch': 3} {'type': 'loss', 'content': 0.08919789642095566, 'timestamp': '2025-09-10 02:49:58.166537', 'step': 20468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:58.197230', 'step': 20468, 'epoch': 3} {'type': 'loss', 'content': 0.06319493055343628, 'timestamp': '2025-09-10 02:49:58.199342', 'step': 20469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.229442', 'step': 20469, 'epoch': 3} {'type': 'loss', 'content': 0.04451795294880867, 'timestamp': '2025-09-10 02:49:58.231922', 'step': 20470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.262810', 'step': 20470, 'epoch': 3} {'type': 'loss', 'content': 0.11536839604377747, 'timestamp': '2025-09-10 02:49:58.268510', 'step': 20471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:58.301555', 'step': 20471, 'epoch': 3} {'type': 'loss', 'content': 0.12544465065002441, 'timestamp': '2025-09-10 02:49:58.325647', 'step': 20472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.356693', 'step': 20472, 'epoch': 3} {'type': 'loss', 'content': 0.10766533017158508, 'timestamp': '2025-09-10 02:49:58.359145', 'step': 20473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:58.389726', 'step': 20473, 'epoch': 3} {'type': 'loss', 'content': 0.06456410884857178, 'timestamp': '2025-09-10 02:49:58.392520', 'step': 20474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:58.427678', 'step': 20474, 'epoch': 3} {'type': 'loss', 'content': 0.05989842116832733, 'timestamp': '2025-09-10 02:49:58.430082', 'step': 20475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:58.461400', 'step': 20475, 'epoch': 3} {'type': 'loss', 'content': 0.040229637175798416, 'timestamp': '2025-09-10 02:49:58.485011', 'step': 20476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.520703', 'step': 20476, 'epoch': 3} {'type': 'loss', 'content': 0.06597423553466797, 'timestamp': '2025-09-10 02:49:58.524418', 'step': 20477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:58.554809', 'step': 20477, 'epoch': 3} {'type': 'loss', 'content': 0.08942072838544846, 'timestamp': '2025-09-10 02:49:58.557306', 'step': 20478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:58.588476', 'step': 20478, 'epoch': 3} {'type': 'loss', 'content': 0.05229763314127922, 'timestamp': '2025-09-10 02:49:58.591235', 'step': 20479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.622706', 'step': 20479, 'epoch': 3} {'type': 'loss', 'content': 0.0556267686188221, 'timestamp': '2025-09-10 02:49:58.646971', 'step': 20480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:58.677286', 'step': 20480, 'epoch': 3} {'type': 'loss', 'content': 0.06427901238203049, 'timestamp': '2025-09-10 02:49:58.679840', 'step': 20481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:58.710016', 'step': 20481, 'epoch': 3} {'type': 'loss', 'content': 0.01921711303293705, 'timestamp': '2025-09-10 02:49:58.712734', 'step': 20482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:49:58.743692', 'step': 20482, 'epoch': 3} {'type': 'loss', 'content': 0.06781090050935745, 'timestamp': '2025-09-10 02:49:58.748142', 'step': 20483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:49:58.778370', 'step': 20483, 'epoch': 3} {'type': 'loss', 'content': 0.10071805119514465, 'timestamp': '2025-09-10 02:49:58.803880', 'step': 20484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:49:58.835360', 'step': 20484, 'epoch': 3} {'type': 'loss', 'content': 0.06541508436203003, 'timestamp': '2025-09-10 02:49:58.837729', 'step': 20485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.871447', 'step': 20485, 'epoch': 3} {'type': 'loss', 'content': 0.1987958699464798, 'timestamp': '2025-09-10 02:49:58.875899', 'step': 20486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.906349', 'step': 20486, 'epoch': 3} {'type': 'loss', 'content': 0.03244374319911003, 'timestamp': '2025-09-10 02:49:58.910380', 'step': 20487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:49:58.942281', 'step': 20487, 'epoch': 3} {'type': 'loss', 'content': 0.038226861506700516, 'timestamp': '2025-09-10 02:49:58.965745', 'step': 20488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:58.996024', 'step': 20488, 'epoch': 3} {'type': 'loss', 'content': 0.0830126404762268, 'timestamp': '2025-09-10 02:49:58.998304', 'step': 20489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:59.028617', 'step': 20489, 'epoch': 3} {'type': 'loss', 'content': 0.05485604703426361, 'timestamp': '2025-09-10 02:49:59.031209', 'step': 20490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:59.062126', 'step': 20490, 'epoch': 3} {'type': 'loss', 'content': 0.03664632886648178, 'timestamp': '2025-09-10 02:49:59.064477', 'step': 20491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:49:59.095410', 'step': 20491, 'epoch': 3} {'type': 'loss', 'content': 0.06740956008434296, 'timestamp': '2025-09-10 02:49:59.119136', 'step': 20492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:49:59.150634', 'step': 20492, 'epoch': 3} {'type': 'loss', 'content': 0.0035527097061276436, 'timestamp': '2025-09-10 02:49:59.153332', 'step': 20493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:59.183196', 'step': 20493, 'epoch': 3} {'type': 'loss', 'content': 0.0483974851667881, 'timestamp': '2025-09-10 02:49:59.185811', 'step': 20494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:49:59.218580', 'step': 20494, 'epoch': 3} {'type': 'loss', 'content': 0.016485553234815598, 'timestamp': '2025-09-10 02:49:59.222881', 'step': 20495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:59.253683', 'step': 20495, 'epoch': 3} {'type': 'loss', 'content': 0.023299306631088257, 'timestamp': '2025-09-10 02:49:59.280535', 'step': 20496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:59.311746', 'step': 20496, 'epoch': 3} {'type': 'loss', 'content': 0.10190799087285995, 'timestamp': '2025-09-10 02:49:59.314190', 'step': 20497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:49:59.344310', 'step': 20497, 'epoch': 3} {'type': 'loss', 'content': 0.036833278834819794, 'timestamp': '2025-09-10 02:49:59.346956', 'step': 20498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:49:59.377909', 'step': 20498, 'epoch': 3} {'type': 'loss', 'content': 0.052057672291994095, 'timestamp': '2025-09-10 02:49:59.380753', 'step': 20499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:49:59.413473', 'step': 20499, 'epoch': 3} {'type': 'loss', 'content': 0.1002366840839386, 'timestamp': '2025-09-10 02:49:59.437785', 'step': 20500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20500', 'timestamp': '2025-09-10 02:50:04.142093', 'step': 20500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:04.175090', 'step': 20500, 'epoch': 3} {'type': 'loss', 'content': 0.03254511579871178, 'timestamp': '2025-09-10 02:50:04.177987', 'step': 20501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:04.210130', 'step': 20501, 'epoch': 3} {'type': 'loss', 'content': 0.04107515886425972, 'timestamp': '2025-09-10 02:50:04.212480', 'step': 20502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:04.243272', 'step': 20502, 'epoch': 3} {'type': 'loss', 'content': 0.042469706386327744, 'timestamp': '2025-09-10 02:50:04.246443', 'step': 20503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:04.277216', 'step': 20503, 'epoch': 3} {'type': 'loss', 'content': 0.05123497173190117, 'timestamp': '2025-09-10 02:50:04.300891', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:50:12.610763', 'step': 20504, 'epoch': 3} {'type': 'pplx', 'content': 11632.161174267065, 'timestamp': '2025-09-10 02:50:12.613882', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:12.642783', 'step': 20504, 'epoch': 3} {'type': 'loss', 'content': 0.10807196795940399, 'timestamp': '2025-09-10 02:50:12.644563', 'step': 20505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:12.673942', 'step': 20505, 'epoch': 3} {'type': 'loss', 'content': 0.01631820574402809, 'timestamp': '2025-09-10 02:50:12.675795', 'step': 20506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:12.705331', 'step': 20506, 'epoch': 3} {'type': 'loss', 'content': 0.03240253031253815, 'timestamp': '2025-09-10 02:50:12.707568', 'step': 20507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:12.738690', 'step': 20507, 'epoch': 3} {'type': 'loss', 'content': 0.04487115889787674, 'timestamp': '2025-09-10 02:50:12.762524', 'step': 20508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:12.792853', 'step': 20508, 'epoch': 3} {'type': 'loss', 'content': 0.05086836963891983, 'timestamp': '2025-09-10 02:50:12.795926', 'step': 20509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:12.826769', 'step': 20509, 'epoch': 3} {'type': 'loss', 'content': 0.04655734449625015, 'timestamp': '2025-09-10 02:50:12.829385', 'step': 20510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:12.860039', 'step': 20510, 'epoch': 3} {'type': 'loss', 'content': 0.0601150318980217, 'timestamp': '2025-09-10 02:50:12.862209', 'step': 20511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:12.892443', 'step': 20511, 'epoch': 3} {'type': 'loss', 'content': 0.02711363695561886, 'timestamp': '2025-09-10 02:50:12.916682', 'step': 20512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:12.948500', 'step': 20512, 'epoch': 3} {'type': 'loss', 'content': 0.05710894986987114, 'timestamp': '2025-09-10 02:50:12.951098', 'step': 20513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:12.983515', 'step': 20513, 'epoch': 3} {'type': 'loss', 'content': 0.0943300724029541, 'timestamp': '2025-09-10 02:50:12.985603', 'step': 20514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:13.014663', 'step': 20514, 'epoch': 3} {'type': 'loss', 'content': 0.05354802682995796, 'timestamp': '2025-09-10 02:50:13.016825', 'step': 20515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:13.046615', 'step': 20515, 'epoch': 3} {'type': 'loss', 'content': 0.08851144462823868, 'timestamp': '2025-09-10 02:50:13.069737', 'step': 20516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.099924', 'step': 20516, 'epoch': 3} {'type': 'loss', 'content': 0.10924886167049408, 'timestamp': '2025-09-10 02:50:13.102249', 'step': 20517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.131433', 'step': 20517, 'epoch': 3} {'type': 'loss', 'content': 0.04433546960353851, 'timestamp': '2025-09-10 02:50:13.133833', 'step': 20518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:13.163745', 'step': 20518, 'epoch': 3} {'type': 'loss', 'content': 0.10797390341758728, 'timestamp': '2025-09-10 02:50:13.165803', 'step': 20519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:13.196193', 'step': 20519, 'epoch': 3} {'type': 'loss', 'content': 0.06350139528512955, 'timestamp': '2025-09-10 02:50:13.219397', 'step': 20520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.250184', 'step': 20520, 'epoch': 3} {'type': 'loss', 'content': 0.06664671003818512, 'timestamp': '2025-09-10 02:50:13.252437', 'step': 20521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:13.282888', 'step': 20521, 'epoch': 3} {'type': 'loss', 'content': 0.0838167816400528, 'timestamp': '2025-09-10 02:50:13.285713', 'step': 20522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.317189', 'step': 20522, 'epoch': 3} {'type': 'loss', 'content': 0.08415921032428741, 'timestamp': '2025-09-10 02:50:13.319470', 'step': 20523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.350293', 'step': 20523, 'epoch': 3} {'type': 'loss', 'content': 0.039423227310180664, 'timestamp': '2025-09-10 02:50:13.377154', 'step': 20524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.406901', 'step': 20524, 'epoch': 3} {'type': 'loss', 'content': 0.07408826053142548, 'timestamp': '2025-09-10 02:50:13.409318', 'step': 20525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:13.439704', 'step': 20525, 'epoch': 3} {'type': 'loss', 'content': 0.04395245760679245, 'timestamp': '2025-09-10 02:50:13.442413', 'step': 20526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:13.475691', 'step': 20526, 'epoch': 3} {'type': 'loss', 'content': 0.08660587668418884, 'timestamp': '2025-09-10 02:50:13.479353', 'step': 20527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:13.513742', 'step': 20527, 'epoch': 3} {'type': 'loss', 'content': 0.09400715678930283, 'timestamp': '2025-09-10 02:50:13.538664', 'step': 20528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.574624', 'step': 20528, 'epoch': 3} {'type': 'loss', 'content': 0.03393164277076721, 'timestamp': '2025-09-10 02:50:13.578347', 'step': 20529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:13.612191', 'step': 20529, 'epoch': 3} {'type': 'loss', 'content': 0.04610573500394821, 'timestamp': '2025-09-10 02:50:13.615847', 'step': 20530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.650269', 'step': 20530, 'epoch': 3} {'type': 'loss', 'content': 0.03594738617539406, 'timestamp': '2025-09-10 02:50:13.652677', 'step': 20531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:13.682959', 'step': 20531, 'epoch': 3} {'type': 'loss', 'content': 0.043820809572935104, 'timestamp': '2025-09-10 02:50:13.706382', 'step': 20532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:13.736205', 'step': 20532, 'epoch': 3} {'type': 'loss', 'content': 0.0428910031914711, 'timestamp': '2025-09-10 02:50:13.738662', 'step': 20533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.769218', 'step': 20533, 'epoch': 3} {'type': 'loss', 'content': 0.01719038374722004, 'timestamp': '2025-09-10 02:50:13.771626', 'step': 20534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.802136', 'step': 20534, 'epoch': 3} {'type': 'loss', 'content': 0.07035563886165619, 'timestamp': '2025-09-10 02:50:13.805019', 'step': 20535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:13.835812', 'step': 20535, 'epoch': 3} {'type': 'loss', 'content': 0.042610205709934235, 'timestamp': '2025-09-10 02:50:13.859384', 'step': 20536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.890522', 'step': 20536, 'epoch': 3} {'type': 'loss', 'content': 0.05170337110757828, 'timestamp': '2025-09-10 02:50:13.892959', 'step': 20537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.923267', 'step': 20537, 'epoch': 3} {'type': 'loss', 'content': 0.09146426618099213, 'timestamp': '2025-09-10 02:50:13.925768', 'step': 20538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:13.956909', 'step': 20538, 'epoch': 3} {'type': 'loss', 'content': 0.024183625355362892, 'timestamp': '2025-09-10 02:50:13.959397', 'step': 20539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:13.995226', 'step': 20539, 'epoch': 3} {'type': 'loss', 'content': 0.09075421094894409, 'timestamp': '2025-09-10 02:50:14.019085', 'step': 20540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:14.049538', 'step': 20540, 'epoch': 3} {'type': 'loss', 'content': 0.1254647970199585, 'timestamp': '2025-09-10 02:50:14.051858', 'step': 20541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:14.082458', 'step': 20541, 'epoch': 3} {'type': 'loss', 'content': 0.03444123640656471, 'timestamp': '2025-09-10 02:50:14.084753', 'step': 20542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:14.114532', 'step': 20542, 'epoch': 3} {'type': 'loss', 'content': 0.08690058439970016, 'timestamp': '2025-09-10 02:50:14.116554', 'step': 20543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:14.149304', 'step': 20543, 'epoch': 3} {'type': 'loss', 'content': 0.06692246347665787, 'timestamp': '2025-09-10 02:50:14.173986', 'step': 20544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:14.204569', 'step': 20544, 'epoch': 3} {'type': 'loss', 'content': 0.12241648882627487, 'timestamp': '2025-09-10 02:50:14.207007', 'step': 20545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:14.237138', 'step': 20545, 'epoch': 3} {'type': 'loss', 'content': 0.14164499938488007, 'timestamp': '2025-09-10 02:50:14.239355', 'step': 20546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:14.269615', 'step': 20546, 'epoch': 3} {'type': 'loss', 'content': 0.14516408741474152, 'timestamp': '2025-09-10 02:50:14.271966', 'step': 20547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:14.302616', 'step': 20547, 'epoch': 3} {'type': 'loss', 'content': 0.11893031001091003, 'timestamp': '2025-09-10 02:50:14.326603', 'step': 20548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:14.357579', 'step': 20548, 'epoch': 3} {'type': 'loss', 'content': 0.05788225308060646, 'timestamp': '2025-09-10 02:50:14.359826', 'step': 20549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:14.390223', 'step': 20549, 'epoch': 3} {'type': 'loss', 'content': 0.09037622809410095, 'timestamp': '2025-09-10 02:50:14.392551', 'step': 20550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:14.424479', 'step': 20550, 'epoch': 3} {'type': 'loss', 'content': 0.13336126506328583, 'timestamp': '2025-09-10 02:50:14.426872', 'step': 20551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:14.457820', 'step': 20551, 'epoch': 3} {'type': 'loss', 'content': 0.07955364882946014, 'timestamp': '2025-09-10 02:50:14.481409', 'step': 20552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:14.511584', 'step': 20552, 'epoch': 3} {'type': 'loss', 'content': 0.0805739238858223, 'timestamp': '2025-09-10 02:50:14.514122', 'step': 20553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:14.544728', 'step': 20553, 'epoch': 3} {'type': 'loss', 'content': 0.05013258010149002, 'timestamp': '2025-09-10 02:50:14.546985', 'step': 20554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:14.578763', 'step': 20554, 'epoch': 3} {'type': 'loss', 'content': 0.15227892994880676, 'timestamp': '2025-09-10 02:50:14.581144', 'step': 20555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:14.611692', 'step': 20555, 'epoch': 3} {'type': 'loss', 'content': 0.04606879875063896, 'timestamp': '2025-09-10 02:50:14.634965', 'step': 20556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:14.665306', 'step': 20556, 'epoch': 3} {'type': 'loss', 'content': 0.08923052996397018, 'timestamp': '2025-09-10 02:50:14.668188', 'step': 20557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:50:14.710686', 'step': 20557, 'epoch': 3} {'type': 'loss', 'content': 0.06616828590631485, 'timestamp': '2025-09-10 02:50:14.715433', 'step': 20558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:14.753973', 'step': 20558, 'epoch': 3} {'type': 'loss', 'content': 0.09144113212823868, 'timestamp': '2025-09-10 02:50:14.757250', 'step': 20559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:14.793786', 'step': 20559, 'epoch': 3} {'type': 'loss', 'content': 0.03689214587211609, 'timestamp': '2025-09-10 02:50:14.818700', 'step': 20560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:14.856583', 'step': 20560, 'epoch': 3} {'type': 'loss', 'content': 0.04783456400036812, 'timestamp': '2025-09-10 02:50:14.858912', 'step': 20561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:14.889698', 'step': 20561, 'epoch': 3} {'type': 'loss', 'content': 0.21484845876693726, 'timestamp': '2025-09-10 02:50:14.892511', 'step': 20562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:14.924332', 'step': 20562, 'epoch': 3} {'type': 'loss', 'content': 0.06328203529119492, 'timestamp': '2025-09-10 02:50:14.927148', 'step': 20563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:14.957683', 'step': 20563, 'epoch': 3} {'type': 'loss', 'content': 0.1302861124277115, 'timestamp': '2025-09-10 02:50:14.980744', 'step': 20564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:15.012583', 'step': 20564, 'epoch': 3} {'type': 'loss', 'content': 0.08520317822694778, 'timestamp': '2025-09-10 02:50:15.015063', 'step': 20565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:15.044573', 'step': 20565, 'epoch': 3} {'type': 'loss', 'content': 0.07899016886949539, 'timestamp': '2025-09-10 02:50:15.046601', 'step': 20566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:15.077665', 'step': 20566, 'epoch': 3} {'type': 'loss', 'content': 0.04275832697749138, 'timestamp': '2025-09-10 02:50:15.079703', 'step': 20567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:50:15.111396', 'step': 20567, 'epoch': 3} {'type': 'loss', 'content': 0.09341881424188614, 'timestamp': '2025-09-10 02:50:15.136476', 'step': 20568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:15.166511', 'step': 20568, 'epoch': 3} {'type': 'loss', 'content': 0.07297036051750183, 'timestamp': '2025-09-10 02:50:15.168529', 'step': 20569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:15.198591', 'step': 20569, 'epoch': 3} {'type': 'loss', 'content': 0.053434208035469055, 'timestamp': '2025-09-10 02:50:15.201394', 'step': 20570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:15.231945', 'step': 20570, 'epoch': 3} {'type': 'loss', 'content': 0.06515275686979294, 'timestamp': '2025-09-10 02:50:15.233980', 'step': 20571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:15.264455', 'step': 20571, 'epoch': 3} {'type': 'loss', 'content': 0.009134850464761257, 'timestamp': '2025-09-10 02:50:15.288132', 'step': 20572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:15.318305', 'step': 20572, 'epoch': 3} {'type': 'loss', 'content': 0.03381440043449402, 'timestamp': '2025-09-10 02:50:15.320501', 'step': 20573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:15.351869', 'step': 20573, 'epoch': 3} {'type': 'loss', 'content': 0.10493379831314087, 'timestamp': '2025-09-10 02:50:15.356227', 'step': 20574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:15.390491', 'step': 20574, 'epoch': 3} {'type': 'loss', 'content': 0.10536392778158188, 'timestamp': '2025-09-10 02:50:15.392567', 'step': 20575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:15.422571', 'step': 20575, 'epoch': 3} {'type': 'loss', 'content': 0.12405788153409958, 'timestamp': '2025-09-10 02:50:15.446631', 'step': 20576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:15.479838', 'step': 20576, 'epoch': 3} {'type': 'loss', 'content': 0.03868212178349495, 'timestamp': '2025-09-10 02:50:15.482124', 'step': 20577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:15.512295', 'step': 20577, 'epoch': 3} {'type': 'loss', 'content': 0.128910094499588, 'timestamp': '2025-09-10 02:50:15.514475', 'step': 20578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:15.544738', 'step': 20578, 'epoch': 3} {'type': 'loss', 'content': 0.03757563233375549, 'timestamp': '2025-09-10 02:50:15.547666', 'step': 20579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:15.578316', 'step': 20579, 'epoch': 3} {'type': 'loss', 'content': 0.07444903254508972, 'timestamp': '2025-09-10 02:50:15.601685', 'step': 20580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:15.633238', 'step': 20580, 'epoch': 3} {'type': 'loss', 'content': 0.06418995559215546, 'timestamp': '2025-09-10 02:50:15.635707', 'step': 20581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:15.665910', 'step': 20581, 'epoch': 3} {'type': 'loss', 'content': 0.05797174200415611, 'timestamp': '2025-09-10 02:50:15.668263', 'step': 20582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:15.699365', 'step': 20582, 'epoch': 3} {'type': 'loss', 'content': 0.04113753139972687, 'timestamp': '2025-09-10 02:50:15.705199', 'step': 20583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:15.745636', 'step': 20583, 'epoch': 3} {'type': 'loss', 'content': 0.04841434955596924, 'timestamp': '2025-09-10 02:50:15.776725', 'step': 20584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:15.826221', 'step': 20584, 'epoch': 3} {'type': 'loss', 'content': 0.01390210073441267, 'timestamp': '2025-09-10 02:50:15.832031', 'step': 20585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:15.872051', 'step': 20585, 'epoch': 3} {'type': 'loss', 'content': 0.010969267226755619, 'timestamp': '2025-09-10 02:50:15.875741', 'step': 20586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:15.912789', 'step': 20586, 'epoch': 3} {'type': 'loss', 'content': 0.045327287167310715, 'timestamp': '2025-09-10 02:50:15.916780', 'step': 20587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:15.951535', 'step': 20587, 'epoch': 3} {'type': 'loss', 'content': 0.08065104484558105, 'timestamp': '2025-09-10 02:50:15.979598', 'step': 20588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:50:16.011687', 'step': 20588, 'epoch': 3} {'type': 'loss', 'content': 0.028090380132198334, 'timestamp': '2025-09-10 02:50:16.014134', 'step': 20589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:16.044348', 'step': 20589, 'epoch': 3} {'type': 'loss', 'content': 0.026227131485939026, 'timestamp': '2025-09-10 02:50:16.049903', 'step': 20590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:16.088807', 'step': 20590, 'epoch': 3} {'type': 'loss', 'content': 0.0775751918554306, 'timestamp': '2025-09-10 02:50:16.093334', 'step': 20591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:16.132525', 'step': 20591, 'epoch': 3} {'type': 'loss', 'content': 0.12085149437189102, 'timestamp': '2025-09-10 02:50:16.155883', 'step': 20592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:16.189184', 'step': 20592, 'epoch': 3} {'type': 'loss', 'content': 0.11983480304479599, 'timestamp': '2025-09-10 02:50:16.191730', 'step': 20593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:16.224624', 'step': 20593, 'epoch': 3} {'type': 'loss', 'content': 0.027015244588255882, 'timestamp': '2025-09-10 02:50:16.227188', 'step': 20594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:16.263181', 'step': 20594, 'epoch': 3} {'type': 'loss', 'content': 0.0269903726875782, 'timestamp': '2025-09-10 02:50:16.265585', 'step': 20595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:16.296583', 'step': 20595, 'epoch': 3} {'type': 'loss', 'content': 0.0893779769539833, 'timestamp': '2025-09-10 02:50:16.320703', 'step': 20596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:16.363927', 'step': 20596, 'epoch': 3} {'type': 'loss', 'content': 0.04013647511601448, 'timestamp': '2025-09-10 02:50:16.366690', 'step': 20597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:16.398055', 'step': 20597, 'epoch': 3} {'type': 'loss', 'content': 0.030018171295523643, 'timestamp': '2025-09-10 02:50:16.415346', 'step': 20598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:16.454279', 'step': 20598, 'epoch': 3} {'type': 'loss', 'content': 0.051037076860666275, 'timestamp': '2025-09-10 02:50:16.456577', 'step': 20599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:16.487828', 'step': 20599, 'epoch': 3} {'type': 'loss', 'content': 0.06440030038356781, 'timestamp': '2025-09-10 02:50:16.511041', 'step': 20600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:16.553176', 'step': 20600, 'epoch': 3} {'type': 'loss', 'content': 0.06395836919546127, 'timestamp': '2025-09-10 02:50:16.555680', 'step': 20601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:16.586677', 'step': 20601, 'epoch': 3} {'type': 'loss', 'content': 0.08678127825260162, 'timestamp': '2025-09-10 02:50:16.594666', 'step': 20602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:16.635190', 'step': 20602, 'epoch': 3} {'type': 'loss', 'content': 0.04666045680642128, 'timestamp': '2025-09-10 02:50:16.637871', 'step': 20603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:16.670282', 'step': 20603, 'epoch': 3} {'type': 'loss', 'content': 0.1177627220749855, 'timestamp': '2025-09-10 02:50:16.693546', 'step': 20604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:16.724578', 'step': 20604, 'epoch': 3} {'type': 'loss', 'content': 0.03642855957150459, 'timestamp': '2025-09-10 02:50:16.726512', 'step': 20605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:16.759680', 'step': 20605, 'epoch': 3} {'type': 'loss', 'content': 0.0906202644109726, 'timestamp': '2025-09-10 02:50:16.762567', 'step': 20606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:16.793885', 'step': 20606, 'epoch': 3} {'type': 'loss', 'content': 0.07743076235055923, 'timestamp': '2025-09-10 02:50:16.796458', 'step': 20607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:16.830269', 'step': 20607, 'epoch': 3} {'type': 'loss', 'content': 0.05762530118227005, 'timestamp': '2025-09-10 02:50:16.853802', 'step': 20608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:16.890886', 'step': 20608, 'epoch': 3} {'type': 'loss', 'content': 0.08688804507255554, 'timestamp': '2025-09-10 02:50:16.896204', 'step': 20609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:16.934654', 'step': 20609, 'epoch': 3} {'type': 'loss', 'content': 0.11367228627204895, 'timestamp': '2025-09-10 02:50:16.937316', 'step': 20610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:16.972019', 'step': 20610, 'epoch': 3} {'type': 'loss', 'content': 0.03940654173493385, 'timestamp': '2025-09-10 02:50:16.974845', 'step': 20611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:17.006328', 'step': 20611, 'epoch': 3} {'type': 'loss', 'content': 0.038083549588918686, 'timestamp': '2025-09-10 02:50:17.029791', 'step': 20612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:17.061292', 'step': 20612, 'epoch': 3} {'type': 'loss', 'content': 0.025228600949048996, 'timestamp': '2025-09-10 02:50:17.063617', 'step': 20613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:17.095442', 'step': 20613, 'epoch': 3} {'type': 'loss', 'content': 0.06502681225538254, 'timestamp': '2025-09-10 02:50:17.097985', 'step': 20614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:17.129719', 'step': 20614, 'epoch': 3} {'type': 'loss', 'content': 0.07216805964708328, 'timestamp': '2025-09-10 02:50:17.132098', 'step': 20615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:17.162579', 'step': 20615, 'epoch': 3} {'type': 'loss', 'content': 0.07677403837442398, 'timestamp': '2025-09-10 02:50:17.186507', 'step': 20616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:17.218238', 'step': 20616, 'epoch': 3} {'type': 'loss', 'content': 0.03291071951389313, 'timestamp': '2025-09-10 02:50:17.220558', 'step': 20617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:17.251362', 'step': 20617, 'epoch': 3} {'type': 'loss', 'content': 0.059829507023096085, 'timestamp': '2025-09-10 02:50:17.253770', 'step': 20618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:17.285018', 'step': 20618, 'epoch': 3} {'type': 'loss', 'content': 0.052870724350214005, 'timestamp': '2025-09-10 02:50:17.287775', 'step': 20619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:17.319610', 'step': 20619, 'epoch': 3} {'type': 'loss', 'content': 0.04608037695288658, 'timestamp': '2025-09-10 02:50:17.343571', 'step': 20620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:17.374657', 'step': 20620, 'epoch': 3} {'type': 'loss', 'content': 0.08313123881816864, 'timestamp': '2025-09-10 02:50:17.377285', 'step': 20621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:17.408053', 'step': 20621, 'epoch': 3} {'type': 'loss', 'content': 0.03136202692985535, 'timestamp': '2025-09-10 02:50:17.412027', 'step': 20622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:17.445231', 'step': 20622, 'epoch': 3} {'type': 'loss', 'content': 0.1255417913198471, 'timestamp': '2025-09-10 02:50:17.447912', 'step': 20623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:17.478692', 'step': 20623, 'epoch': 3} {'type': 'loss', 'content': 0.04156089574098587, 'timestamp': '2025-09-10 02:50:17.502423', 'step': 20624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:17.533747', 'step': 20624, 'epoch': 3} {'type': 'loss', 'content': 0.07352001219987869, 'timestamp': '2025-09-10 02:50:17.536664', 'step': 20625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:17.567464', 'step': 20625, 'epoch': 3} {'type': 'loss', 'content': 0.03958122804760933, 'timestamp': '2025-09-10 02:50:17.570450', 'step': 20626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:17.602512', 'step': 20626, 'epoch': 3} {'type': 'loss', 'content': 0.10555125027894974, 'timestamp': '2025-09-10 02:50:17.605187', 'step': 20627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:17.636962', 'step': 20627, 'epoch': 3} {'type': 'loss', 'content': 0.03803698346018791, 'timestamp': '2025-09-10 02:50:17.660952', 'step': 20628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:17.693653', 'step': 20628, 'epoch': 3} {'type': 'loss', 'content': 0.07463778555393219, 'timestamp': '2025-09-10 02:50:17.696976', 'step': 20629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:17.728455', 'step': 20629, 'epoch': 3} {'type': 'loss', 'content': 0.0586780421435833, 'timestamp': '2025-09-10 02:50:17.730719', 'step': 20630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:17.763603', 'step': 20630, 'epoch': 3} {'type': 'loss', 'content': 0.12376625090837479, 'timestamp': '2025-09-10 02:50:17.766407', 'step': 20631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:17.796661', 'step': 20631, 'epoch': 3} {'type': 'loss', 'content': 0.03301025182008743, 'timestamp': '2025-09-10 02:50:17.820307', 'step': 20632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:17.852542', 'step': 20632, 'epoch': 3} {'type': 'loss', 'content': 0.027288485318422318, 'timestamp': '2025-09-10 02:50:17.855366', 'step': 20633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:17.886125', 'step': 20633, 'epoch': 3} {'type': 'loss', 'content': 0.05013330653309822, 'timestamp': '2025-09-10 02:50:17.888105', 'step': 20634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:17.920130', 'step': 20634, 'epoch': 3} {'type': 'loss', 'content': 0.05360505357384682, 'timestamp': '2025-09-10 02:50:17.924075', 'step': 20635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:17.955456', 'step': 20635, 'epoch': 3} {'type': 'loss', 'content': 0.040127478539943695, 'timestamp': '2025-09-10 02:50:17.979920', 'step': 20636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:18.011667', 'step': 20636, 'epoch': 3} {'type': 'loss', 'content': 0.06448639184236526, 'timestamp': '2025-09-10 02:50:18.014425', 'step': 20637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:18.047932', 'step': 20637, 'epoch': 3} {'type': 'loss', 'content': 0.012793362140655518, 'timestamp': '2025-09-10 02:50:18.050490', 'step': 20638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:18.081365', 'step': 20638, 'epoch': 3} {'type': 'loss', 'content': 0.05111315846443176, 'timestamp': '2025-09-10 02:50:18.085179', 'step': 20639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:18.121546', 'step': 20639, 'epoch': 3} {'type': 'loss', 'content': 0.09902879595756531, 'timestamp': '2025-09-10 02:50:18.148319', 'step': 20640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:18.181811', 'step': 20640, 'epoch': 3} {'type': 'loss', 'content': 0.03369075059890747, 'timestamp': '2025-09-10 02:50:18.184273', 'step': 20641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:18.219967', 'step': 20641, 'epoch': 3} {'type': 'loss', 'content': 0.10566631704568863, 'timestamp': '2025-09-10 02:50:18.223274', 'step': 20642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:18.259537', 'step': 20642, 'epoch': 3} {'type': 'loss', 'content': 0.08436334878206253, 'timestamp': '2025-09-10 02:50:18.263310', 'step': 20643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:18.295407', 'step': 20643, 'epoch': 3} {'type': 'loss', 'content': 0.011264681816101074, 'timestamp': '2025-09-10 02:50:18.320089', 'step': 20644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:18.352591', 'step': 20644, 'epoch': 3} {'type': 'loss', 'content': 0.09177973121404648, 'timestamp': '2025-09-10 02:50:18.355003', 'step': 20645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:18.391560', 'step': 20645, 'epoch': 3} {'type': 'loss', 'content': 0.12079674750566483, 'timestamp': '2025-09-10 02:50:18.394073', 'step': 20646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:18.425828', 'step': 20646, 'epoch': 3} {'type': 'loss', 'content': 0.031534742563962936, 'timestamp': '2025-09-10 02:50:18.429916', 'step': 20647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:18.463000', 'step': 20647, 'epoch': 3} {'type': 'loss', 'content': 0.024638622999191284, 'timestamp': '2025-09-10 02:50:18.486980', 'step': 20648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:18.518627', 'step': 20648, 'epoch': 3} {'type': 'loss', 'content': 0.06458648294210434, 'timestamp': '2025-09-10 02:50:18.521071', 'step': 20649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:18.552379', 'step': 20649, 'epoch': 3} {'type': 'loss', 'content': 0.08500231057405472, 'timestamp': '2025-09-10 02:50:18.554772', 'step': 20650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:18.585550', 'step': 20650, 'epoch': 3} {'type': 'loss', 'content': 0.028815286234021187, 'timestamp': '2025-09-10 02:50:18.587898', 'step': 20651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:18.617991', 'step': 20651, 'epoch': 3} {'type': 'loss', 'content': 0.04730469733476639, 'timestamp': '2025-09-10 02:50:18.642684', 'step': 20652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:18.673054', 'step': 20652, 'epoch': 3} {'type': 'loss', 'content': 0.049201950430870056, 'timestamp': '2025-09-10 02:50:18.675710', 'step': 20653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:18.706742', 'step': 20653, 'epoch': 3} {'type': 'loss', 'content': 0.03946613520383835, 'timestamp': '2025-09-10 02:50:18.710090', 'step': 20654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:18.740914', 'step': 20654, 'epoch': 3} {'type': 'loss', 'content': 0.041082024574279785, 'timestamp': '2025-09-10 02:50:18.744597', 'step': 20655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:18.775048', 'step': 20655, 'epoch': 3} {'type': 'loss', 'content': 0.13118286430835724, 'timestamp': '2025-09-10 02:50:18.798850', 'step': 20656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:18.830287', 'step': 20656, 'epoch': 3} {'type': 'loss', 'content': 0.06195378303527832, 'timestamp': '2025-09-10 02:50:18.834712', 'step': 20657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:18.869413', 'step': 20657, 'epoch': 3} {'type': 'loss', 'content': 0.08191732317209244, 'timestamp': '2025-09-10 02:50:18.871838', 'step': 20658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:18.902186', 'step': 20658, 'epoch': 3} {'type': 'loss', 'content': 0.05380701646208763, 'timestamp': '2025-09-10 02:50:18.904955', 'step': 20659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:18.935188', 'step': 20659, 'epoch': 3} {'type': 'loss', 'content': 0.09093034267425537, 'timestamp': '2025-09-10 02:50:18.961416', 'step': 20660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:18.993124', 'step': 20660, 'epoch': 3} {'type': 'loss', 'content': 0.017724277451634407, 'timestamp': '2025-09-10 02:50:18.995571', 'step': 20661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:19.026821', 'step': 20661, 'epoch': 3} {'type': 'loss', 'content': 0.04239509254693985, 'timestamp': '2025-09-10 02:50:19.029523', 'step': 20662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.062694', 'step': 20662, 'epoch': 3} {'type': 'loss', 'content': 0.02773956023156643, 'timestamp': '2025-09-10 02:50:19.065246', 'step': 20663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:19.095431', 'step': 20663, 'epoch': 3} {'type': 'loss', 'content': 0.05119076371192932, 'timestamp': '2025-09-10 02:50:19.119021', 'step': 20664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.149388', 'step': 20664, 'epoch': 3} {'type': 'loss', 'content': 0.08908005803823471, 'timestamp': '2025-09-10 02:50:19.152525', 'step': 20665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.183609', 'step': 20665, 'epoch': 3} {'type': 'loss', 'content': 0.08753392100334167, 'timestamp': '2025-09-10 02:50:19.186198', 'step': 20666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.216413', 'step': 20666, 'epoch': 3} {'type': 'loss', 'content': 0.04826131835579872, 'timestamp': '2025-09-10 02:50:19.219079', 'step': 20667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:19.249487', 'step': 20667, 'epoch': 3} {'type': 'loss', 'content': 0.07001414149999619, 'timestamp': '2025-09-10 02:50:19.273290', 'step': 20668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.304657', 'step': 20668, 'epoch': 3} {'type': 'loss', 'content': 0.06588823348283768, 'timestamp': '2025-09-10 02:50:19.307337', 'step': 20669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.337800', 'step': 20669, 'epoch': 3} {'type': 'loss', 'content': 0.06411192566156387, 'timestamp': '2025-09-10 02:50:19.342358', 'step': 20670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.372687', 'step': 20670, 'epoch': 3} {'type': 'loss', 'content': 0.09450148791074753, 'timestamp': '2025-09-10 02:50:19.377921', 'step': 20671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.410477', 'step': 20671, 'epoch': 3} {'type': 'loss', 'content': 0.07929761707782745, 'timestamp': '2025-09-10 02:50:19.434287', 'step': 20672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:19.467192', 'step': 20672, 'epoch': 3} {'type': 'loss', 'content': 0.027445456013083458, 'timestamp': '2025-09-10 02:50:19.470288', 'step': 20673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:19.501824', 'step': 20673, 'epoch': 3} {'type': 'loss', 'content': 0.039778292179107666, 'timestamp': '2025-09-10 02:50:19.504416', 'step': 20674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:19.535861', 'step': 20674, 'epoch': 3} {'type': 'loss', 'content': 0.0584772564470768, 'timestamp': '2025-09-10 02:50:19.538662', 'step': 20675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.571676', 'step': 20675, 'epoch': 3} {'type': 'loss', 'content': 0.022359980270266533, 'timestamp': '2025-09-10 02:50:19.595850', 'step': 20676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:19.627127', 'step': 20676, 'epoch': 3} {'type': 'loss', 'content': 0.09718243032693863, 'timestamp': '2025-09-10 02:50:19.630171', 'step': 20677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:19.661778', 'step': 20677, 'epoch': 3} {'type': 'loss', 'content': 0.12348328530788422, 'timestamp': '2025-09-10 02:50:19.664279', 'step': 20678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:19.695238', 'step': 20678, 'epoch': 3} {'type': 'loss', 'content': 0.07059868425130844, 'timestamp': '2025-09-10 02:50:19.697883', 'step': 20679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.728466', 'step': 20679, 'epoch': 3} {'type': 'loss', 'content': 0.05752555653452873, 'timestamp': '2025-09-10 02:50:19.752269', 'step': 20680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.783895', 'step': 20680, 'epoch': 3} {'type': 'loss', 'content': 0.07201583683490753, 'timestamp': '2025-09-10 02:50:19.786876', 'step': 20681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:19.818254', 'step': 20681, 'epoch': 3} {'type': 'loss', 'content': 0.04886946454644203, 'timestamp': '2025-09-10 02:50:19.820926', 'step': 20682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.851486', 'step': 20682, 'epoch': 3} {'type': 'loss', 'content': 0.0201927088201046, 'timestamp': '2025-09-10 02:50:19.853843', 'step': 20683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.884913', 'step': 20683, 'epoch': 3} {'type': 'loss', 'content': 0.06861468404531479, 'timestamp': '2025-09-10 02:50:19.908334', 'step': 20684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:19.940187', 'step': 20684, 'epoch': 3} {'type': 'loss', 'content': 0.09724614024162292, 'timestamp': '2025-09-10 02:50:19.942440', 'step': 20685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:19.974377', 'step': 20685, 'epoch': 3} {'type': 'loss', 'content': 0.13453440368175507, 'timestamp': '2025-09-10 02:50:19.977203', 'step': 20686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:20.007978', 'step': 20686, 'epoch': 3} {'type': 'loss', 'content': 0.10079152137041092, 'timestamp': '2025-09-10 02:50:20.010299', 'step': 20687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:20.041933', 'step': 20687, 'epoch': 3} {'type': 'loss', 'content': 0.1153438538312912, 'timestamp': '2025-09-10 02:50:20.065893', 'step': 20688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.097378', 'step': 20688, 'epoch': 3} {'type': 'loss', 'content': 0.05576068162918091, 'timestamp': '2025-09-10 02:50:20.099911', 'step': 20689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:20.130381', 'step': 20689, 'epoch': 3} {'type': 'loss', 'content': 0.021806620061397552, 'timestamp': '2025-09-10 02:50:20.132944', 'step': 20690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:20.163596', 'step': 20690, 'epoch': 3} {'type': 'loss', 'content': 0.11678002029657364, 'timestamp': '2025-09-10 02:50:20.166938', 'step': 20691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:20.199384', 'step': 20691, 'epoch': 3} {'type': 'loss', 'content': 0.16553188860416412, 'timestamp': '2025-09-10 02:50:20.222950', 'step': 20692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.254274', 'step': 20692, 'epoch': 3} {'type': 'loss', 'content': 0.05337469279766083, 'timestamp': '2025-09-10 02:50:20.256939', 'step': 20693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:20.287378', 'step': 20693, 'epoch': 3} {'type': 'loss', 'content': 0.0037375541869550943, 'timestamp': '2025-09-10 02:50:20.289698', 'step': 20694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:20.321799', 'step': 20694, 'epoch': 3} {'type': 'loss', 'content': 0.051851317286491394, 'timestamp': '2025-09-10 02:50:20.324424', 'step': 20695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:20.355132', 'step': 20695, 'epoch': 3} {'type': 'loss', 'content': 0.059701353311538696, 'timestamp': '2025-09-10 02:50:20.378474', 'step': 20696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:20.417931', 'step': 20696, 'epoch': 3} {'type': 'loss', 'content': 0.06262477487325668, 'timestamp': '2025-09-10 02:50:20.420693', 'step': 20697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:20.451717', 'step': 20697, 'epoch': 3} {'type': 'loss', 'content': 0.03688271343708038, 'timestamp': '2025-09-10 02:50:20.456274', 'step': 20698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:20.486445', 'step': 20698, 'epoch': 3} {'type': 'loss', 'content': 0.07798762619495392, 'timestamp': '2025-09-10 02:50:20.489340', 'step': 20699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:20.520707', 'step': 20699, 'epoch': 3} {'type': 'loss', 'content': 0.030285226181149483, 'timestamp': '2025-09-10 02:50:20.544414', 'step': 20700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:20.575694', 'step': 20700, 'epoch': 3} {'type': 'loss', 'content': 0.11219578236341476, 'timestamp': '2025-09-10 02:50:20.578080', 'step': 20701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.608641', 'step': 20701, 'epoch': 3} {'type': 'loss', 'content': 0.03279836103320122, 'timestamp': '2025-09-10 02:50:20.611341', 'step': 20702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.642379', 'step': 20702, 'epoch': 3} {'type': 'loss', 'content': 0.0938679426908493, 'timestamp': '2025-09-10 02:50:20.646245', 'step': 20703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:20.676363', 'step': 20703, 'epoch': 3} {'type': 'loss', 'content': 0.0272963959723711, 'timestamp': '2025-09-10 02:50:20.700495', 'step': 20704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:20.731395', 'step': 20704, 'epoch': 3} {'type': 'loss', 'content': 0.15667372941970825, 'timestamp': '2025-09-10 02:50:20.733839', 'step': 20705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:20.764377', 'step': 20705, 'epoch': 3} {'type': 'loss', 'content': 0.1131121963262558, 'timestamp': '2025-09-10 02:50:20.766806', 'step': 20706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.797377', 'step': 20706, 'epoch': 3} {'type': 'loss', 'content': 0.06394190341234207, 'timestamp': '2025-09-10 02:50:20.800101', 'step': 20707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.830532', 'step': 20707, 'epoch': 3} {'type': 'loss', 'content': 0.019660506397485733, 'timestamp': '2025-09-10 02:50:20.854444', 'step': 20708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.887173', 'step': 20708, 'epoch': 3} {'type': 'loss', 'content': 0.042137712240219116, 'timestamp': '2025-09-10 02:50:20.889442', 'step': 20709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.920260', 'step': 20709, 'epoch': 3} {'type': 'loss', 'content': 0.09726571291685104, 'timestamp': '2025-09-10 02:50:20.923813', 'step': 20710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.955590', 'step': 20710, 'epoch': 3} {'type': 'loss', 'content': 0.06555028259754181, 'timestamp': '2025-09-10 02:50:20.958277', 'step': 20711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:20.991204', 'step': 20711, 'epoch': 3} {'type': 'loss', 'content': 0.045803964138031006, 'timestamp': '2025-09-10 02:50:21.015715', 'step': 20712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:21.049566', 'step': 20712, 'epoch': 3} {'type': 'loss', 'content': 0.04341302812099457, 'timestamp': '2025-09-10 02:50:21.052768', 'step': 20713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:21.083961', 'step': 20713, 'epoch': 3} {'type': 'loss', 'content': 0.052333444356918335, 'timestamp': '2025-09-10 02:50:21.086664', 'step': 20714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.117869', 'step': 20714, 'epoch': 3} {'type': 'loss', 'content': 0.050480473786592484, 'timestamp': '2025-09-10 02:50:21.120195', 'step': 20715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:21.150587', 'step': 20715, 'epoch': 3} {'type': 'loss', 'content': 0.07380473613739014, 'timestamp': '2025-09-10 02:50:21.174815', 'step': 20716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.206329', 'step': 20716, 'epoch': 3} {'type': 'loss', 'content': 0.041851650923490524, 'timestamp': '2025-09-10 02:50:21.209261', 'step': 20717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:21.241266', 'step': 20717, 'epoch': 3} {'type': 'loss', 'content': 0.012344371527433395, 'timestamp': '2025-09-10 02:50:21.244728', 'step': 20718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.276301', 'step': 20718, 'epoch': 3} {'type': 'loss', 'content': 0.022286413237452507, 'timestamp': '2025-09-10 02:50:21.280065', 'step': 20719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:21.312516', 'step': 20719, 'epoch': 3} {'type': 'loss', 'content': 0.04685563966631889, 'timestamp': '2025-09-10 02:50:21.337041', 'step': 20720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.368581', 'step': 20720, 'epoch': 3} {'type': 'loss', 'content': 0.1102852001786232, 'timestamp': '2025-09-10 02:50:21.371889', 'step': 20721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:21.404569', 'step': 20721, 'epoch': 3} {'type': 'loss', 'content': 0.09750193357467651, 'timestamp': '2025-09-10 02:50:21.407079', 'step': 20722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:21.436391', 'step': 20722, 'epoch': 3} {'type': 'loss', 'content': 0.0024118644651025534, 'timestamp': '2025-09-10 02:50:21.438920', 'step': 20723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.470029', 'step': 20723, 'epoch': 3} {'type': 'loss', 'content': 0.031216561794281006, 'timestamp': '2025-09-10 02:50:21.494358', 'step': 20724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.526085', 'step': 20724, 'epoch': 3} {'type': 'loss', 'content': 0.03521530330181122, 'timestamp': '2025-09-10 02:50:21.528502', 'step': 20725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:21.560504', 'step': 20725, 'epoch': 3} {'type': 'loss', 'content': 0.08345498889684677, 'timestamp': '2025-09-10 02:50:21.563022', 'step': 20726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:21.595918', 'step': 20726, 'epoch': 3} {'type': 'loss', 'content': 0.05600493401288986, 'timestamp': '2025-09-10 02:50:21.598400', 'step': 20727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:21.630149', 'step': 20727, 'epoch': 3} {'type': 'loss', 'content': 0.11949533969163895, 'timestamp': '2025-09-10 02:50:21.654003', 'step': 20728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:21.684915', 'step': 20728, 'epoch': 3} {'type': 'loss', 'content': 0.045320406556129456, 'timestamp': '2025-09-10 02:50:21.687977', 'step': 20729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.718419', 'step': 20729, 'epoch': 3} {'type': 'loss', 'content': 0.02100539579987526, 'timestamp': '2025-09-10 02:50:21.721412', 'step': 20730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.752567', 'step': 20730, 'epoch': 3} {'type': 'loss', 'content': 0.0370502807199955, 'timestamp': '2025-09-10 02:50:21.755557', 'step': 20731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:21.786459', 'step': 20731, 'epoch': 3} {'type': 'loss', 'content': 0.04839851334691048, 'timestamp': '2025-09-10 02:50:21.810014', 'step': 20732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:21.841859', 'step': 20732, 'epoch': 3} {'type': 'loss', 'content': 0.052837565541267395, 'timestamp': '2025-09-10 02:50:21.844762', 'step': 20733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.875293', 'step': 20733, 'epoch': 3} {'type': 'loss', 'content': 0.07912765443325043, 'timestamp': '2025-09-10 02:50:21.877676', 'step': 20734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:21.908201', 'step': 20734, 'epoch': 3} {'type': 'loss', 'content': 0.10633129626512527, 'timestamp': '2025-09-10 02:50:21.911089', 'step': 20735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:21.941612', 'step': 20735, 'epoch': 3} {'type': 'loss', 'content': 0.04096478596329689, 'timestamp': '2025-09-10 02:50:21.965781', 'step': 20736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:21.998447', 'step': 20736, 'epoch': 3} {'type': 'loss', 'content': 0.06455358117818832, 'timestamp': '2025-09-10 02:50:22.001339', 'step': 20737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:22.032628', 'step': 20737, 'epoch': 3} {'type': 'loss', 'content': 0.10504291206598282, 'timestamp': '2025-09-10 02:50:22.035086', 'step': 20738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:22.064982', 'step': 20738, 'epoch': 3} {'type': 'loss', 'content': 0.014668243937194347, 'timestamp': '2025-09-10 02:50:22.067493', 'step': 20739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:22.097942', 'step': 20739, 'epoch': 3} {'type': 'loss', 'content': 0.12019898742437363, 'timestamp': '2025-09-10 02:50:22.123058', 'step': 20740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.164328', 'step': 20740, 'epoch': 3} {'type': 'loss', 'content': 0.035878926515579224, 'timestamp': '2025-09-10 02:50:22.167387', 'step': 20741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:22.198667', 'step': 20741, 'epoch': 3} {'type': 'loss', 'content': 0.1489277184009552, 'timestamp': '2025-09-10 02:50:22.201247', 'step': 20742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:22.231512', 'step': 20742, 'epoch': 3} {'type': 'loss', 'content': 0.07931814342737198, 'timestamp': '2025-09-10 02:50:22.234002', 'step': 20743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.264382', 'step': 20743, 'epoch': 3} {'type': 'loss', 'content': 0.06596176326274872, 'timestamp': '2025-09-10 02:50:22.288356', 'step': 20744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:22.318920', 'step': 20744, 'epoch': 3} {'type': 'loss', 'content': 0.0689525380730629, 'timestamp': '2025-09-10 02:50:22.321446', 'step': 20745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:22.352305', 'step': 20745, 'epoch': 3} {'type': 'loss', 'content': 0.10799432545900345, 'timestamp': '2025-09-10 02:50:22.354765', 'step': 20746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:22.385862', 'step': 20746, 'epoch': 3} {'type': 'loss', 'content': 0.032742902636528015, 'timestamp': '2025-09-10 02:50:22.391996', 'step': 20747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.423528', 'step': 20747, 'epoch': 3} {'type': 'loss', 'content': 0.023224033415317535, 'timestamp': '2025-09-10 02:50:22.447428', 'step': 20748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:22.478397', 'step': 20748, 'epoch': 3} {'type': 'loss', 'content': 0.025309573858976364, 'timestamp': '2025-09-10 02:50:22.483195', 'step': 20749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.513078', 'step': 20749, 'epoch': 3} {'type': 'loss', 'content': 0.06879691034555435, 'timestamp': '2025-09-10 02:50:22.517260', 'step': 20750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.550900', 'step': 20750, 'epoch': 3} {'type': 'loss', 'content': 0.13297830522060394, 'timestamp': '2025-09-10 02:50:22.553720', 'step': 20751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:22.584773', 'step': 20751, 'epoch': 3} {'type': 'loss', 'content': 0.027498530223965645, 'timestamp': '2025-09-10 02:50:22.608521', 'step': 20752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:50:22.639718', 'step': 20752, 'epoch': 3} {'type': 'loss', 'content': 0.08717511594295502, 'timestamp': '2025-09-10 02:50:22.642601', 'step': 20753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.675622', 'step': 20753, 'epoch': 3} {'type': 'loss', 'content': 0.034148797392845154, 'timestamp': '2025-09-10 02:50:22.678481', 'step': 20754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.712037', 'step': 20754, 'epoch': 3} {'type': 'loss', 'content': 0.041384562849998474, 'timestamp': '2025-09-10 02:50:22.714622', 'step': 20755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:22.745923', 'step': 20755, 'epoch': 3} {'type': 'loss', 'content': 0.07955145835876465, 'timestamp': '2025-09-10 02:50:22.769729', 'step': 20756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.801564', 'step': 20756, 'epoch': 3} {'type': 'loss', 'content': 0.057952187955379486, 'timestamp': '2025-09-10 02:50:22.804147', 'step': 20757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.834238', 'step': 20757, 'epoch': 3} {'type': 'loss', 'content': 0.05594410374760628, 'timestamp': '2025-09-10 02:50:22.836768', 'step': 20758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.866684', 'step': 20758, 'epoch': 3} {'type': 'loss', 'content': 0.06270171701908112, 'timestamp': '2025-09-10 02:50:22.869449', 'step': 20759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:22.900202', 'step': 20759, 'epoch': 3} {'type': 'loss', 'content': 0.06185990199446678, 'timestamp': '2025-09-10 02:50:22.924544', 'step': 20760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:22.955776', 'step': 20760, 'epoch': 3} {'type': 'loss', 'content': 0.054016053676605225, 'timestamp': '2025-09-10 02:50:22.958349', 'step': 20761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:22.995690', 'step': 20761, 'epoch': 3} {'type': 'loss', 'content': 0.10142767429351807, 'timestamp': '2025-09-10 02:50:22.998376', 'step': 20762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:23.029190', 'step': 20762, 'epoch': 3} {'type': 'loss', 'content': 0.03608446195721626, 'timestamp': '2025-09-10 02:50:23.031646', 'step': 20763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:23.063659', 'step': 20763, 'epoch': 3} {'type': 'loss', 'content': 0.06589312851428986, 'timestamp': '2025-09-10 02:50:23.087390', 'step': 20764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:23.119295', 'step': 20764, 'epoch': 3} {'type': 'loss', 'content': 0.11464601010084152, 'timestamp': '2025-09-10 02:50:23.121674', 'step': 20765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:23.152174', 'step': 20765, 'epoch': 3} {'type': 'loss', 'content': 0.07436785846948624, 'timestamp': '2025-09-10 02:50:23.154828', 'step': 20766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:23.185106', 'step': 20766, 'epoch': 3} {'type': 'loss', 'content': 0.10784627497196198, 'timestamp': '2025-09-10 02:50:23.187985', 'step': 20767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:23.219355', 'step': 20767, 'epoch': 3} {'type': 'loss', 'content': 0.035294659435749054, 'timestamp': '2025-09-10 02:50:23.243688', 'step': 20768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:23.273692', 'step': 20768, 'epoch': 3} {'type': 'loss', 'content': 0.07639691233634949, 'timestamp': '2025-09-10 02:50:23.276403', 'step': 20769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:23.306560', 'step': 20769, 'epoch': 3} {'type': 'loss', 'content': 0.12464682012796402, 'timestamp': '2025-09-10 02:50:23.308945', 'step': 20770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:23.338908', 'step': 20770, 'epoch': 3} {'type': 'loss', 'content': 0.04626430198550224, 'timestamp': '2025-09-10 02:50:23.341586', 'step': 20771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:23.374457', 'step': 20771, 'epoch': 3} {'type': 'loss', 'content': 0.04364242032170296, 'timestamp': '2025-09-10 02:50:23.398457', 'step': 20772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:23.440036', 'step': 20772, 'epoch': 3} {'type': 'loss', 'content': 0.0610080286860466, 'timestamp': '2025-09-10 02:50:23.442769', 'step': 20773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:23.474053', 'step': 20773, 'epoch': 3} {'type': 'loss', 'content': 0.03914429619908333, 'timestamp': '2025-09-10 02:50:23.476681', 'step': 20774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:23.507133', 'step': 20774, 'epoch': 3} {'type': 'loss', 'content': 0.08883120119571686, 'timestamp': '2025-09-10 02:50:23.512416', 'step': 20775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:23.547416', 'step': 20775, 'epoch': 3} {'type': 'loss', 'content': 0.025671405717730522, 'timestamp': '2025-09-10 02:50:23.571286', 'step': 20776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:23.601956', 'step': 20776, 'epoch': 3} {'type': 'loss', 'content': 0.05063799023628235, 'timestamp': '2025-09-10 02:50:23.604577', 'step': 20777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:23.634624', 'step': 20777, 'epoch': 3} {'type': 'loss', 'content': 0.06241845339536667, 'timestamp': '2025-09-10 02:50:23.637982', 'step': 20778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:23.667967', 'step': 20778, 'epoch': 3} {'type': 'loss', 'content': 0.053741905838251114, 'timestamp': '2025-09-10 02:50:23.670375', 'step': 20779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:23.701485', 'step': 20779, 'epoch': 3} {'type': 'loss', 'content': 0.11563320457935333, 'timestamp': '2025-09-10 02:50:23.725213', 'step': 20780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:23.757740', 'step': 20780, 'epoch': 3} {'type': 'loss', 'content': 0.06482870876789093, 'timestamp': '2025-09-10 02:50:23.760236', 'step': 20781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:23.791139', 'step': 20781, 'epoch': 3} {'type': 'loss', 'content': 0.06218690797686577, 'timestamp': '2025-09-10 02:50:23.793914', 'step': 20782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:23.826287', 'step': 20782, 'epoch': 3} {'type': 'loss', 'content': 0.05371016263961792, 'timestamp': '2025-09-10 02:50:23.830463', 'step': 20783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:23.860892', 'step': 20783, 'epoch': 3} {'type': 'loss', 'content': 0.030982306227087975, 'timestamp': '2025-09-10 02:50:23.884736', 'step': 20784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:23.915562', 'step': 20784, 'epoch': 3} {'type': 'loss', 'content': 0.05683445930480957, 'timestamp': '2025-09-10 02:50:23.917838', 'step': 20785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:23.949046', 'step': 20785, 'epoch': 3} {'type': 'loss', 'content': 0.04658225178718567, 'timestamp': '2025-09-10 02:50:23.951674', 'step': 20786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:23.982793', 'step': 20786, 'epoch': 3} {'type': 'loss', 'content': 0.04023267328739166, 'timestamp': '2025-09-10 02:50:23.985275', 'step': 20787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:24.016439', 'step': 20787, 'epoch': 3} {'type': 'loss', 'content': 0.09313646703958511, 'timestamp': '2025-09-10 02:50:24.040095', 'step': 20788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:24.070991', 'step': 20788, 'epoch': 3} {'type': 'loss', 'content': 0.06486379355192184, 'timestamp': '2025-09-10 02:50:24.073267', 'step': 20789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:24.113600', 'step': 20789, 'epoch': 3} {'type': 'loss', 'content': 0.09711041301488876, 'timestamp': '2025-09-10 02:50:24.116360', 'step': 20790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:24.148093', 'step': 20790, 'epoch': 3} {'type': 'loss', 'content': 0.047095224261283875, 'timestamp': '2025-09-10 02:50:24.151528', 'step': 20791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:24.182345', 'step': 20791, 'epoch': 3} {'type': 'loss', 'content': 0.06922852247953415, 'timestamp': '2025-09-10 02:50:24.205877', 'step': 20792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:24.238907', 'step': 20792, 'epoch': 3} {'type': 'loss', 'content': 0.08253998309373856, 'timestamp': '2025-09-10 02:50:24.241511', 'step': 20793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:24.272675', 'step': 20793, 'epoch': 3} {'type': 'loss', 'content': 0.023634154349565506, 'timestamp': '2025-09-10 02:50:24.275515', 'step': 20794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:24.306750', 'step': 20794, 'epoch': 3} {'type': 'loss', 'content': 0.05645398050546646, 'timestamp': '2025-09-10 02:50:24.309425', 'step': 20795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:24.340342', 'step': 20795, 'epoch': 3} {'type': 'loss', 'content': 0.03524589538574219, 'timestamp': '2025-09-10 02:50:24.364002', 'step': 20796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:24.394047', 'step': 20796, 'epoch': 3} {'type': 'loss', 'content': 0.040000490844249725, 'timestamp': '2025-09-10 02:50:24.396885', 'step': 20797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:24.427316', 'step': 20797, 'epoch': 3} {'type': 'loss', 'content': 0.0316099151968956, 'timestamp': '2025-09-10 02:50:24.430206', 'step': 20798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:24.460844', 'step': 20798, 'epoch': 3} {'type': 'loss', 'content': 0.08530091494321823, 'timestamp': '2025-09-10 02:50:24.463703', 'step': 20799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:24.495150', 'step': 20799, 'epoch': 3} {'type': 'loss', 'content': 0.08179569244384766, 'timestamp': '2025-09-10 02:50:24.518733', 'step': 20800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:24.549435', 'step': 20800, 'epoch': 3} {'type': 'loss', 'content': 0.13263937830924988, 'timestamp': '2025-09-10 02:50:24.551988', 'step': 20801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:24.582029', 'step': 20801, 'epoch': 3} {'type': 'loss', 'content': 0.0648462176322937, 'timestamp': '2025-09-10 02:50:24.584073', 'step': 20802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:24.614752', 'step': 20802, 'epoch': 3} {'type': 'loss', 'content': 0.06912604719400406, 'timestamp': '2025-09-10 02:50:24.617174', 'step': 20803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:24.647844', 'step': 20803, 'epoch': 3} {'type': 'loss', 'content': 0.03414401784539223, 'timestamp': '2025-09-10 02:50:24.671804', 'step': 20804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:24.704753', 'step': 20804, 'epoch': 3} {'type': 'loss', 'content': 0.021350042894482613, 'timestamp': '2025-09-10 02:50:24.707188', 'step': 20805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:24.739190', 'step': 20805, 'epoch': 3} {'type': 'loss', 'content': 0.14279764890670776, 'timestamp': '2025-09-10 02:50:24.741971', 'step': 20806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:24.772262', 'step': 20806, 'epoch': 3} {'type': 'loss', 'content': 0.040008675307035446, 'timestamp': '2025-09-10 02:50:24.774649', 'step': 20807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:24.805345', 'step': 20807, 'epoch': 3} {'type': 'loss', 'content': 0.07642433047294617, 'timestamp': '2025-09-10 02:50:24.829078', 'step': 20808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:24.859066', 'step': 20808, 'epoch': 3} {'type': 'loss', 'content': 0.12803959846496582, 'timestamp': '2025-09-10 02:50:24.861815', 'step': 20809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:24.892407', 'step': 20809, 'epoch': 3} {'type': 'loss', 'content': 0.01707128994166851, 'timestamp': '2025-09-10 02:50:24.894789', 'step': 20810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:24.924772', 'step': 20810, 'epoch': 3} {'type': 'loss', 'content': 0.10026424378156662, 'timestamp': '2025-09-10 02:50:24.927515', 'step': 20811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:24.958070', 'step': 20811, 'epoch': 3} {'type': 'loss', 'content': 0.04294716566801071, 'timestamp': '2025-09-10 02:50:24.981807', 'step': 20812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:25.012706', 'step': 20812, 'epoch': 3} {'type': 'loss', 'content': 0.047110557556152344, 'timestamp': '2025-09-10 02:50:25.015889', 'step': 20813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:25.046116', 'step': 20813, 'epoch': 3} {'type': 'loss', 'content': 0.02739908918738365, 'timestamp': '2025-09-10 02:50:25.048677', 'step': 20814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:25.079545', 'step': 20814, 'epoch': 3} {'type': 'loss', 'content': 0.09452712535858154, 'timestamp': '2025-09-10 02:50:25.082249', 'step': 20815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:25.112650', 'step': 20815, 'epoch': 3} {'type': 'loss', 'content': 0.06642655283212662, 'timestamp': '2025-09-10 02:50:25.138597', 'step': 20816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:25.175716', 'step': 20816, 'epoch': 3} {'type': 'loss', 'content': 0.09767872840166092, 'timestamp': '2025-09-10 02:50:25.178098', 'step': 20817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:25.208377', 'step': 20817, 'epoch': 3} {'type': 'loss', 'content': 0.07804953306913376, 'timestamp': '2025-09-10 02:50:25.210738', 'step': 20818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:25.243457', 'step': 20818, 'epoch': 3} {'type': 'loss', 'content': 0.014334038831293583, 'timestamp': '2025-09-10 02:50:25.246146', 'step': 20819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:25.286183', 'step': 20819, 'epoch': 3} {'type': 'loss', 'content': 0.08030358701944351, 'timestamp': '2025-09-10 02:50:25.309732', 'step': 20820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:25.341040', 'step': 20820, 'epoch': 3} {'type': 'loss', 'content': 0.06574375927448273, 'timestamp': '2025-09-10 02:50:25.343399', 'step': 20821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:25.373859', 'step': 20821, 'epoch': 3} {'type': 'loss', 'content': 0.03655107319355011, 'timestamp': '2025-09-10 02:50:25.379178', 'step': 20822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:25.409595', 'step': 20822, 'epoch': 3} {'type': 'loss', 'content': 0.03814522922039032, 'timestamp': '2025-09-10 02:50:25.415560', 'step': 20823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:25.453908', 'step': 20823, 'epoch': 3} {'type': 'loss', 'content': 0.12011156231164932, 'timestamp': '2025-09-10 02:50:25.477953', 'step': 20824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:25.508504', 'step': 20824, 'epoch': 3} {'type': 'loss', 'content': 0.08084017783403397, 'timestamp': '2025-09-10 02:50:25.510821', 'step': 20825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:25.550107', 'step': 20825, 'epoch': 3} {'type': 'loss', 'content': 0.10670570284128189, 'timestamp': '2025-09-10 02:50:25.554068', 'step': 20826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:25.585133', 'step': 20826, 'epoch': 3} {'type': 'loss', 'content': 0.04667803272604942, 'timestamp': '2025-09-10 02:50:25.587848', 'step': 20827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:25.626357', 'step': 20827, 'epoch': 3} {'type': 'loss', 'content': 0.08705408126115799, 'timestamp': '2025-09-10 02:50:25.651838', 'step': 20828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:25.683946', 'step': 20828, 'epoch': 3} {'type': 'loss', 'content': 0.0769585520029068, 'timestamp': '2025-09-10 02:50:25.690302', 'step': 20829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:25.728065', 'step': 20829, 'epoch': 3} {'type': 'loss', 'content': 0.09419314563274384, 'timestamp': '2025-09-10 02:50:25.731002', 'step': 20830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:25.761472', 'step': 20830, 'epoch': 3} {'type': 'loss', 'content': 0.07078266888856888, 'timestamp': '2025-09-10 02:50:25.765385', 'step': 20831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:25.797381', 'step': 20831, 'epoch': 3} {'type': 'loss', 'content': 0.05460405722260475, 'timestamp': '2025-09-10 02:50:25.821173', 'step': 20832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:25.853881', 'step': 20832, 'epoch': 3} {'type': 'loss', 'content': 0.06485771387815475, 'timestamp': '2025-09-10 02:50:25.856837', 'step': 20833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:25.890866', 'step': 20833, 'epoch': 3} {'type': 'loss', 'content': 0.13542018830776215, 'timestamp': '2025-09-10 02:50:25.893271', 'step': 20834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:25.924356', 'step': 20834, 'epoch': 3} {'type': 'loss', 'content': 0.06643666326999664, 'timestamp': '2025-09-10 02:50:25.927833', 'step': 20835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:25.957879', 'step': 20835, 'epoch': 3} {'type': 'loss', 'content': 0.06876826286315918, 'timestamp': '2025-09-10 02:50:25.981759', 'step': 20836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:26.015302', 'step': 20836, 'epoch': 3} {'type': 'loss', 'content': 0.04469170793890953, 'timestamp': '2025-09-10 02:50:26.017751', 'step': 20837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:26.047960', 'step': 20837, 'epoch': 3} {'type': 'loss', 'content': 0.026836877688765526, 'timestamp': '2025-09-10 02:50:26.055731', 'step': 20838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.086043', 'step': 20838, 'epoch': 3} {'type': 'loss', 'content': 0.11026351898908615, 'timestamp': '2025-09-10 02:50:26.089600', 'step': 20839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.120905', 'step': 20839, 'epoch': 3} {'type': 'loss', 'content': 0.06862343847751617, 'timestamp': '2025-09-10 02:50:26.154711', 'step': 20840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.194669', 'step': 20840, 'epoch': 3} {'type': 'loss', 'content': 0.0560670904815197, 'timestamp': '2025-09-10 02:50:26.199135', 'step': 20841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:26.242306', 'step': 20841, 'epoch': 3} {'type': 'loss', 'content': 0.07860163599252701, 'timestamp': '2025-09-10 02:50:26.249509', 'step': 20842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.286280', 'step': 20842, 'epoch': 3} {'type': 'loss', 'content': 0.10935277491807938, 'timestamp': '2025-09-10 02:50:26.292234', 'step': 20843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:26.332848', 'step': 20843, 'epoch': 3} {'type': 'loss', 'content': 0.05322938784956932, 'timestamp': '2025-09-10 02:50:26.358597', 'step': 20844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:26.400875', 'step': 20844, 'epoch': 3} {'type': 'loss', 'content': 0.04584065452218056, 'timestamp': '2025-09-10 02:50:26.406264', 'step': 20845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:26.448038', 'step': 20845, 'epoch': 3} {'type': 'loss', 'content': 0.07224126905202866, 'timestamp': '2025-09-10 02:50:26.451073', 'step': 20846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.483188', 'step': 20846, 'epoch': 3} {'type': 'loss', 'content': 0.03188794106245041, 'timestamp': '2025-09-10 02:50:26.486841', 'step': 20847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.522548', 'step': 20847, 'epoch': 3} {'type': 'loss', 'content': 0.015355213545262814, 'timestamp': '2025-09-10 02:50:26.546502', 'step': 20848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.588864', 'step': 20848, 'epoch': 3} {'type': 'loss', 'content': 0.06906206905841827, 'timestamp': '2025-09-10 02:50:26.591382', 'step': 20849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.620852', 'step': 20849, 'epoch': 3} {'type': 'loss', 'content': 0.016252126544713974, 'timestamp': '2025-09-10 02:50:26.623479', 'step': 20850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.654756', 'step': 20850, 'epoch': 3} {'type': 'loss', 'content': 0.01537395641207695, 'timestamp': '2025-09-10 02:50:26.657382', 'step': 20851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.688627', 'step': 20851, 'epoch': 3} {'type': 'loss', 'content': 0.05938591808080673, 'timestamp': '2025-09-10 02:50:26.712592', 'step': 20852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:26.743704', 'step': 20852, 'epoch': 3} {'type': 'loss', 'content': 0.12796799838542938, 'timestamp': '2025-09-10 02:50:26.746333', 'step': 20853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:26.779876', 'step': 20853, 'epoch': 3} {'type': 'loss', 'content': 0.08741473406553268, 'timestamp': '2025-09-10 02:50:26.782406', 'step': 20854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:26.812838', 'step': 20854, 'epoch': 3} {'type': 'loss', 'content': 0.06233038753271103, 'timestamp': '2025-09-10 02:50:26.817066', 'step': 20855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:26.853936', 'step': 20855, 'epoch': 3} {'type': 'loss', 'content': 0.06904951483011246, 'timestamp': '2025-09-10 02:50:26.877672', 'step': 20856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:26.923503', 'step': 20856, 'epoch': 3} {'type': 'loss', 'content': 0.01750761829316616, 'timestamp': '2025-09-10 02:50:26.926544', 'step': 20857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:26.958624', 'step': 20857, 'epoch': 3} {'type': 'loss', 'content': 0.03215981647372246, 'timestamp': '2025-09-10 02:50:26.961263', 'step': 20858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:26.992586', 'step': 20858, 'epoch': 3} {'type': 'loss', 'content': 0.044940605759620667, 'timestamp': '2025-09-10 02:50:26.995349', 'step': 20859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:27.028639', 'step': 20859, 'epoch': 3} {'type': 'loss', 'content': 0.10396184772253036, 'timestamp': '2025-09-10 02:50:27.064055', 'step': 20860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:27.102603', 'step': 20860, 'epoch': 3} {'type': 'loss', 'content': 0.09808385372161865, 'timestamp': '2025-09-10 02:50:27.104971', 'step': 20861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:27.136137', 'step': 20861, 'epoch': 3} {'type': 'loss', 'content': 0.08423778414726257, 'timestamp': '2025-09-10 02:50:27.138550', 'step': 20862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:27.171769', 'step': 20862, 'epoch': 3} {'type': 'loss', 'content': 0.01831979490816593, 'timestamp': '2025-09-10 02:50:27.174610', 'step': 20863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:27.205314', 'step': 20863, 'epoch': 3} {'type': 'loss', 'content': 0.05173270404338837, 'timestamp': '2025-09-10 02:50:27.228904', 'step': 20864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:27.260123', 'step': 20864, 'epoch': 3} {'type': 'loss', 'content': 0.1600823700428009, 'timestamp': '2025-09-10 02:50:27.262568', 'step': 20865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:27.292897', 'step': 20865, 'epoch': 3} {'type': 'loss', 'content': 0.0735200047492981, 'timestamp': '2025-09-10 02:50:27.295398', 'step': 20866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:27.325816', 'step': 20866, 'epoch': 3} {'type': 'loss', 'content': 0.07661262154579163, 'timestamp': '2025-09-10 02:50:27.328413', 'step': 20867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:27.360671', 'step': 20867, 'epoch': 3} {'type': 'loss', 'content': 0.09637807309627533, 'timestamp': '2025-09-10 02:50:27.384100', 'step': 20868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:27.414335', 'step': 20868, 'epoch': 3} {'type': 'loss', 'content': 0.06463008373975754, 'timestamp': '2025-09-10 02:50:27.416803', 'step': 20869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:27.450395', 'step': 20869, 'epoch': 3} {'type': 'loss', 'content': 0.047490522265434265, 'timestamp': '2025-09-10 02:50:27.453097', 'step': 20870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:27.484639', 'step': 20870, 'epoch': 3} {'type': 'loss', 'content': 0.040452297776937485, 'timestamp': '2025-09-10 02:50:27.487439', 'step': 20871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:27.517238', 'step': 20871, 'epoch': 3} {'type': 'loss', 'content': 0.08198143541812897, 'timestamp': '2025-09-10 02:50:27.540785', 'step': 20872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:27.571812', 'step': 20872, 'epoch': 3} {'type': 'loss', 'content': 0.120388925075531, 'timestamp': '2025-09-10 02:50:27.574617', 'step': 20873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:27.605389', 'step': 20873, 'epoch': 3} {'type': 'loss', 'content': 0.059240225702524185, 'timestamp': '2025-09-10 02:50:27.607505', 'step': 20874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:27.637456', 'step': 20874, 'epoch': 3} {'type': 'loss', 'content': 0.06218661740422249, 'timestamp': '2025-09-10 02:50:27.640389', 'step': 20875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:27.673924', 'step': 20875, 'epoch': 3} {'type': 'loss', 'content': 0.04834582656621933, 'timestamp': '2025-09-10 02:50:27.697776', 'step': 20876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:27.730608', 'step': 20876, 'epoch': 3} {'type': 'loss', 'content': 0.05967969074845314, 'timestamp': '2025-09-10 02:50:27.733253', 'step': 20877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:27.763875', 'step': 20877, 'epoch': 3} {'type': 'loss', 'content': 0.042233821004629135, 'timestamp': '2025-09-10 02:50:27.766346', 'step': 20878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:27.796570', 'step': 20878, 'epoch': 3} {'type': 'loss', 'content': 0.12095260620117188, 'timestamp': '2025-09-10 02:50:27.798891', 'step': 20879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:27.829133', 'step': 20879, 'epoch': 3} {'type': 'loss', 'content': 0.03868066519498825, 'timestamp': '2025-09-10 02:50:27.853087', 'step': 20880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:27.883958', 'step': 20880, 'epoch': 3} {'type': 'loss', 'content': 0.03784072771668434, 'timestamp': '2025-09-10 02:50:27.886467', 'step': 20881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:27.917436', 'step': 20881, 'epoch': 3} {'type': 'loss', 'content': 0.05234073847532272, 'timestamp': '2025-09-10 02:50:27.919985', 'step': 20882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:27.950188', 'step': 20882, 'epoch': 3} {'type': 'loss', 'content': 0.011466016992926598, 'timestamp': '2025-09-10 02:50:27.953581', 'step': 20883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:27.984736', 'step': 20883, 'epoch': 3} {'type': 'loss', 'content': 0.009740712121129036, 'timestamp': '2025-09-10 02:50:28.009111', 'step': 20884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:28.039495', 'step': 20884, 'epoch': 3} {'type': 'loss', 'content': 0.05502951517701149, 'timestamp': '2025-09-10 02:50:28.042398', 'step': 20885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:28.073295', 'step': 20885, 'epoch': 3} {'type': 'loss', 'content': 0.043241236358881, 'timestamp': '2025-09-10 02:50:28.075675', 'step': 20886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:28.106191', 'step': 20886, 'epoch': 3} {'type': 'loss', 'content': 0.04096372425556183, 'timestamp': '2025-09-10 02:50:28.108873', 'step': 20887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:28.139947', 'step': 20887, 'epoch': 3} {'type': 'loss', 'content': 0.0688360184431076, 'timestamp': '2025-09-10 02:50:28.163961', 'step': 20888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:28.195421', 'step': 20888, 'epoch': 3} {'type': 'loss', 'content': 0.047010235488414764, 'timestamp': '2025-09-10 02:50:28.197625', 'step': 20889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:28.228399', 'step': 20889, 'epoch': 3} {'type': 'loss', 'content': 0.08217261731624603, 'timestamp': '2025-09-10 02:50:28.231072', 'step': 20890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:28.262320', 'step': 20890, 'epoch': 3} {'type': 'loss', 'content': 0.015320820733904839, 'timestamp': '2025-09-10 02:50:28.265087', 'step': 20891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-10 02:50:28.296611', 'step': 20891, 'epoch': 3} {'type': 'loss', 'content': 0.07341913878917694, 'timestamp': '2025-09-10 02:50:28.324492', 'step': 20892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:28.355883', 'step': 20892, 'epoch': 3} {'type': 'loss', 'content': 0.10063320398330688, 'timestamp': '2025-09-10 02:50:28.360445', 'step': 20893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:28.391477', 'step': 20893, 'epoch': 3} {'type': 'loss', 'content': 0.04728363826870918, 'timestamp': '2025-09-10 02:50:28.394800', 'step': 20894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:28.426145', 'step': 20894, 'epoch': 3} {'type': 'loss', 'content': 0.11524567008018494, 'timestamp': '2025-09-10 02:50:28.435281', 'step': 20895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:28.475161', 'step': 20895, 'epoch': 3} {'type': 'loss', 'content': 0.06751930713653564, 'timestamp': '2025-09-10 02:50:28.498845', 'step': 20896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:28.530731', 'step': 20896, 'epoch': 3} {'type': 'loss', 'content': 0.11925818771123886, 'timestamp': '2025-09-10 02:50:28.533983', 'step': 20897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:28.564641', 'step': 20897, 'epoch': 3} {'type': 'loss', 'content': 0.10978144407272339, 'timestamp': '2025-09-10 02:50:28.567358', 'step': 20898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:28.598141', 'step': 20898, 'epoch': 3} {'type': 'loss', 'content': 0.0569315031170845, 'timestamp': '2025-09-10 02:50:28.601014', 'step': 20899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:50:28.632130', 'step': 20899, 'epoch': 3} {'type': 'loss', 'content': 0.04964183270931244, 'timestamp': '2025-09-10 02:50:28.657539', 'step': 20900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:28.691024', 'step': 20900, 'epoch': 3} {'type': 'loss', 'content': 0.06135665252804756, 'timestamp': '2025-09-10 02:50:28.693739', 'step': 20901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:28.726845', 'step': 20901, 'epoch': 3} {'type': 'loss', 'content': 0.06721443682909012, 'timestamp': '2025-09-10 02:50:28.730191', 'step': 20902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:28.761967', 'step': 20902, 'epoch': 3} {'type': 'loss', 'content': 0.07718154042959213, 'timestamp': '2025-09-10 02:50:28.765586', 'step': 20903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:28.796558', 'step': 20903, 'epoch': 3} {'type': 'loss', 'content': 0.10170591622591019, 'timestamp': '2025-09-10 02:50:28.820546', 'step': 20904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:28.851673', 'step': 20904, 'epoch': 3} {'type': 'loss', 'content': 0.05513766407966614, 'timestamp': '2025-09-10 02:50:28.854453', 'step': 20905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:28.885513', 'step': 20905, 'epoch': 3} {'type': 'loss', 'content': 0.05780862271785736, 'timestamp': '2025-09-10 02:50:28.887690', 'step': 20906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:28.918126', 'step': 20906, 'epoch': 3} {'type': 'loss', 'content': 0.01689337193965912, 'timestamp': '2025-09-10 02:50:28.922624', 'step': 20907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:28.954189', 'step': 20907, 'epoch': 3} {'type': 'loss', 'content': 0.09357084333896637, 'timestamp': '2025-09-10 02:50:28.977763', 'step': 20908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:29.008608', 'step': 20908, 'epoch': 3} {'type': 'loss', 'content': 0.037497930228710175, 'timestamp': '2025-09-10 02:50:29.011334', 'step': 20909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.041317', 'step': 20909, 'epoch': 3} {'type': 'loss', 'content': 0.13948684930801392, 'timestamp': '2025-09-10 02:50:29.044081', 'step': 20910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.074644', 'step': 20910, 'epoch': 3} {'type': 'loss', 'content': 0.05145537853240967, 'timestamp': '2025-09-10 02:50:29.081203', 'step': 20911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.113378', 'step': 20911, 'epoch': 3} {'type': 'loss', 'content': 0.029962213709950447, 'timestamp': '2025-09-10 02:50:29.137100', 'step': 20912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:29.167234', 'step': 20912, 'epoch': 3} {'type': 'loss', 'content': 0.047448981553316116, 'timestamp': '2025-09-10 02:50:29.172929', 'step': 20913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.204127', 'step': 20913, 'epoch': 3} {'type': 'loss', 'content': 0.14013494551181793, 'timestamp': '2025-09-10 02:50:29.206637', 'step': 20914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:29.238475', 'step': 20914, 'epoch': 3} {'type': 'loss', 'content': 0.11612766236066818, 'timestamp': '2025-09-10 02:50:29.241258', 'step': 20915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:29.271664', 'step': 20915, 'epoch': 3} {'type': 'loss', 'content': 0.07000228762626648, 'timestamp': '2025-09-10 02:50:29.295232', 'step': 20916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.327454', 'step': 20916, 'epoch': 3} {'type': 'loss', 'content': 0.14325354993343353, 'timestamp': '2025-09-10 02:50:29.329804', 'step': 20917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:29.361480', 'step': 20917, 'epoch': 3} {'type': 'loss', 'content': 0.06956511735916138, 'timestamp': '2025-09-10 02:50:29.363929', 'step': 20918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:29.396129', 'step': 20918, 'epoch': 3} {'type': 'loss', 'content': 0.13221368193626404, 'timestamp': '2025-09-10 02:50:29.398616', 'step': 20919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:29.430820', 'step': 20919, 'epoch': 3} {'type': 'loss', 'content': 0.12891218066215515, 'timestamp': '2025-09-10 02:50:29.454506', 'step': 20920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.485209', 'step': 20920, 'epoch': 3} {'type': 'loss', 'content': 0.06458574533462524, 'timestamp': '2025-09-10 02:50:29.488022', 'step': 20921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:29.518293', 'step': 20921, 'epoch': 3} {'type': 'loss', 'content': 0.0980740487575531, 'timestamp': '2025-09-10 02:50:29.521449', 'step': 20922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.552316', 'step': 20922, 'epoch': 3} {'type': 'loss', 'content': 0.08181023597717285, 'timestamp': '2025-09-10 02:50:29.554598', 'step': 20923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.586029', 'step': 20923, 'epoch': 3} {'type': 'loss', 'content': 0.08265706151723862, 'timestamp': '2025-09-10 02:50:29.610358', 'step': 20924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.641612', 'step': 20924, 'epoch': 3} {'type': 'loss', 'content': 0.0950455442070961, 'timestamp': '2025-09-10 02:50:29.644197', 'step': 20925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.674502', 'step': 20925, 'epoch': 3} {'type': 'loss', 'content': 0.034989360719919205, 'timestamp': '2025-09-10 02:50:29.677866', 'step': 20926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:29.708545', 'step': 20926, 'epoch': 3} {'type': 'loss', 'content': 0.10169264674186707, 'timestamp': '2025-09-10 02:50:29.710794', 'step': 20927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:29.740615', 'step': 20927, 'epoch': 3} {'type': 'loss', 'content': 0.04601573199033737, 'timestamp': '2025-09-10 02:50:29.764620', 'step': 20928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:29.795467', 'step': 20928, 'epoch': 3} {'type': 'loss', 'content': 0.08150726556777954, 'timestamp': '2025-09-10 02:50:29.798122', 'step': 20929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:29.828964', 'step': 20929, 'epoch': 3} {'type': 'loss', 'content': 0.08030927926301956, 'timestamp': '2025-09-10 02:50:29.831242', 'step': 20930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:29.861968', 'step': 20930, 'epoch': 3} {'type': 'loss', 'content': 0.10980074107646942, 'timestamp': '2025-09-10 02:50:29.864254', 'step': 20931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:29.894264', 'step': 20931, 'epoch': 3} {'type': 'loss', 'content': 0.015165694057941437, 'timestamp': '2025-09-10 02:50:29.917981', 'step': 20932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:29.948131', 'step': 20932, 'epoch': 3} {'type': 'loss', 'content': 0.149403914809227, 'timestamp': '2025-09-10 02:50:29.950943', 'step': 20933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:29.981788', 'step': 20933, 'epoch': 3} {'type': 'loss', 'content': 0.06568044424057007, 'timestamp': '2025-09-10 02:50:29.984363', 'step': 20934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:50:30.014683', 'step': 20934, 'epoch': 3} {'type': 'loss', 'content': 0.029894733801484108, 'timestamp': '2025-09-10 02:50:30.019137', 'step': 20935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.050197', 'step': 20935, 'epoch': 3} {'type': 'loss', 'content': 0.07391778379678726, 'timestamp': '2025-09-10 02:50:30.073674', 'step': 20936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.105341', 'step': 20936, 'epoch': 3} {'type': 'loss', 'content': 0.07356815785169601, 'timestamp': '2025-09-10 02:50:30.107702', 'step': 20937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.138255', 'step': 20937, 'epoch': 3} {'type': 'loss', 'content': 0.02156192623078823, 'timestamp': '2025-09-10 02:50:30.140936', 'step': 20938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.172204', 'step': 20938, 'epoch': 3} {'type': 'loss', 'content': 0.014075685292482376, 'timestamp': '2025-09-10 02:50:30.174699', 'step': 20939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.207285', 'step': 20939, 'epoch': 3} {'type': 'loss', 'content': 0.08633295446634293, 'timestamp': '2025-09-10 02:50:30.230839', 'step': 20940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.261171', 'step': 20940, 'epoch': 3} {'type': 'loss', 'content': 0.03928419575095177, 'timestamp': '2025-09-10 02:50:30.264277', 'step': 20941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.298901', 'step': 20941, 'epoch': 3} {'type': 'loss', 'content': 0.05533042177557945, 'timestamp': '2025-09-10 02:50:30.301251', 'step': 20942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.331432', 'step': 20942, 'epoch': 3} {'type': 'loss', 'content': 0.03725408762693405, 'timestamp': '2025-09-10 02:50:30.333900', 'step': 20943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.365061', 'step': 20943, 'epoch': 3} {'type': 'loss', 'content': 0.11873753368854523, 'timestamp': '2025-09-10 02:50:30.388909', 'step': 20944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.419132', 'step': 20944, 'epoch': 3} {'type': 'loss', 'content': 0.04740293323993683, 'timestamp': '2025-09-10 02:50:30.421981', 'step': 20945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.453187', 'step': 20945, 'epoch': 3} {'type': 'loss', 'content': 0.05467955768108368, 'timestamp': '2025-09-10 02:50:30.455606', 'step': 20946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.486238', 'step': 20946, 'epoch': 3} {'type': 'loss', 'content': 0.017498459666967392, 'timestamp': '2025-09-10 02:50:30.488908', 'step': 20947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.520040', 'step': 20947, 'epoch': 3} {'type': 'loss', 'content': 0.06337202340364456, 'timestamp': '2025-09-10 02:50:30.543555', 'step': 20948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.574366', 'step': 20948, 'epoch': 3} {'type': 'loss', 'content': 0.07694844901561737, 'timestamp': '2025-09-10 02:50:30.577114', 'step': 20949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.606826', 'step': 20949, 'epoch': 3} {'type': 'loss', 'content': 0.09413878619670868, 'timestamp': '2025-09-10 02:50:30.609563', 'step': 20950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.641042', 'step': 20950, 'epoch': 3} {'type': 'loss', 'content': 0.062355589121580124, 'timestamp': '2025-09-10 02:50:30.643436', 'step': 20951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:30.673424', 'step': 20951, 'epoch': 3} {'type': 'loss', 'content': 0.07777328044176102, 'timestamp': '2025-09-10 02:50:30.696963', 'step': 20952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.727197', 'step': 20952, 'epoch': 3} {'type': 'loss', 'content': 0.07876020669937134, 'timestamp': '2025-09-10 02:50:30.729867', 'step': 20953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.759907', 'step': 20953, 'epoch': 3} {'type': 'loss', 'content': 0.07792641967535019, 'timestamp': '2025-09-10 02:50:30.762435', 'step': 20954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:30.793168', 'step': 20954, 'epoch': 3} {'type': 'loss', 'content': 0.11482328176498413, 'timestamp': '2025-09-10 02:50:30.795811', 'step': 20955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.826691', 'step': 20955, 'epoch': 3} {'type': 'loss', 'content': 0.12193158268928528, 'timestamp': '2025-09-10 02:50:30.850524', 'step': 20956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.881621', 'step': 20956, 'epoch': 3} {'type': 'loss', 'content': 0.026357032358646393, 'timestamp': '2025-09-10 02:50:30.884257', 'step': 20957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:30.916118', 'step': 20957, 'epoch': 3} {'type': 'loss', 'content': 0.06926178187131882, 'timestamp': '2025-09-10 02:50:30.918958', 'step': 20958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.949118', 'step': 20958, 'epoch': 3} {'type': 'loss', 'content': 0.08439239859580994, 'timestamp': '2025-09-10 02:50:30.951775', 'step': 20959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:30.982140', 'step': 20959, 'epoch': 3} {'type': 'loss', 'content': 0.04419741407036781, 'timestamp': '2025-09-10 02:50:31.005626', 'step': 20960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:31.036694', 'step': 20960, 'epoch': 3} {'type': 'loss', 'content': 0.11948931962251663, 'timestamp': '2025-09-10 02:50:31.039145', 'step': 20961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:31.069256', 'step': 20961, 'epoch': 3} {'type': 'loss', 'content': 0.010516298934817314, 'timestamp': '2025-09-10 02:50:31.071624', 'step': 20962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:31.101893', 'step': 20962, 'epoch': 3} {'type': 'loss', 'content': 0.08209041506052017, 'timestamp': '2025-09-10 02:50:31.104265', 'step': 20963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:31.134663', 'step': 20963, 'epoch': 3} {'type': 'loss', 'content': 0.018678918480873108, 'timestamp': '2025-09-10 02:50:31.158428', 'step': 20964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:31.190509', 'step': 20964, 'epoch': 3} {'type': 'loss', 'content': 0.07170531898736954, 'timestamp': '2025-09-10 02:50:31.193342', 'step': 20965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:31.225249', 'step': 20965, 'epoch': 3} {'type': 'loss', 'content': 0.06034279987215996, 'timestamp': '2025-09-10 02:50:31.227499', 'step': 20966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:31.258402', 'step': 20966, 'epoch': 3} {'type': 'loss', 'content': 0.04568915069103241, 'timestamp': '2025-09-10 02:50:31.260852', 'step': 20967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:31.293043', 'step': 20967, 'epoch': 3} {'type': 'loss', 'content': 0.034952711313962936, 'timestamp': '2025-09-10 02:50:31.318169', 'step': 20968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:31.349296', 'step': 20968, 'epoch': 3} {'type': 'loss', 'content': 0.03973917290568352, 'timestamp': '2025-09-10 02:50:31.352090', 'step': 20969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:31.383331', 'step': 20969, 'epoch': 3} {'type': 'loss', 'content': 0.03078603185713291, 'timestamp': '2025-09-10 02:50:31.385233', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:50:39.708104', 'step': 20970, 'epoch': 3} {'type': 'pplx', 'content': 10672.219012882248, 'timestamp': '2025-09-10 02:50:39.711341', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:39.741364', 'step': 20970, 'epoch': 3} {'type': 'loss', 'content': 0.08771299570798874, 'timestamp': '2025-09-10 02:50:39.743738', 'step': 20971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:39.773887', 'step': 20971, 'epoch': 3} {'type': 'loss', 'content': 0.06362126767635345, 'timestamp': '2025-09-10 02:50:39.797771', 'step': 20972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:39.828446', 'step': 20972, 'epoch': 3} {'type': 'loss', 'content': 0.037252046167850494, 'timestamp': '2025-09-10 02:50:39.830865', 'step': 20973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:39.860875', 'step': 20973, 'epoch': 3} {'type': 'loss', 'content': 0.02668362483382225, 'timestamp': '2025-09-10 02:50:39.863298', 'step': 20974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:39.892887', 'step': 20974, 'epoch': 3} {'type': 'loss', 'content': 0.08795278519392014, 'timestamp': '2025-09-10 02:50:39.895406', 'step': 20975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:39.925693', 'step': 20975, 'epoch': 3} {'type': 'loss', 'content': 0.0567777045071125, 'timestamp': '2025-09-10 02:50:39.949489', 'step': 20976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:39.979361', 'step': 20976, 'epoch': 3} {'type': 'loss', 'content': 0.05748094990849495, 'timestamp': '2025-09-10 02:50:39.981733', 'step': 20977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:40.014686', 'step': 20977, 'epoch': 3} {'type': 'loss', 'content': 0.0851702094078064, 'timestamp': '2025-09-10 02:50:40.017426', 'step': 20978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:40.047083', 'step': 20978, 'epoch': 3} {'type': 'loss', 'content': 0.13690093159675598, 'timestamp': '2025-09-10 02:50:40.049868', 'step': 20979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:40.079679', 'step': 20979, 'epoch': 3} {'type': 'loss', 'content': 0.06762499362230301, 'timestamp': '2025-09-10 02:50:40.103641', 'step': 20980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:40.136923', 'step': 20980, 'epoch': 3} {'type': 'loss', 'content': 0.0736774355173111, 'timestamp': '2025-09-10 02:50:40.139539', 'step': 20981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:40.172261', 'step': 20981, 'epoch': 3} {'type': 'loss', 'content': 0.0956256240606308, 'timestamp': '2025-09-10 02:50:40.174752', 'step': 20982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:40.205091', 'step': 20982, 'epoch': 3} {'type': 'loss', 'content': 0.018511174246668816, 'timestamp': '2025-09-10 02:50:40.207085', 'step': 20983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:40.238849', 'step': 20983, 'epoch': 3} {'type': 'loss', 'content': 0.0842197984457016, 'timestamp': '2025-09-10 02:50:40.265564', 'step': 20984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:40.301818', 'step': 20984, 'epoch': 3} {'type': 'loss', 'content': 0.08301001787185669, 'timestamp': '2025-09-10 02:50:40.306108', 'step': 20985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:40.346373', 'step': 20985, 'epoch': 3} {'type': 'loss', 'content': 0.023927021771669388, 'timestamp': '2025-09-10 02:50:40.349488', 'step': 20986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:40.387812', 'step': 20986, 'epoch': 3} {'type': 'loss', 'content': 0.08047429472208023, 'timestamp': '2025-09-10 02:50:40.391309', 'step': 20987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:40.428296', 'step': 20987, 'epoch': 3} {'type': 'loss', 'content': 0.06973837316036224, 'timestamp': '2025-09-10 02:50:40.454885', 'step': 20988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:40.492034', 'step': 20988, 'epoch': 3} {'type': 'loss', 'content': 0.058014899492263794, 'timestamp': '2025-09-10 02:50:40.496088', 'step': 20989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:40.537909', 'step': 20989, 'epoch': 3} {'type': 'loss', 'content': 0.02698521502315998, 'timestamp': '2025-09-10 02:50:40.543567', 'step': 20990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:40.585708', 'step': 20990, 'epoch': 3} {'type': 'loss', 'content': 0.10459782928228378, 'timestamp': '2025-09-10 02:50:40.588257', 'step': 20991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:40.617780', 'step': 20991, 'epoch': 3} {'type': 'loss', 'content': 0.03703520447015762, 'timestamp': '2025-09-10 02:50:40.642825', 'step': 20992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:40.673530', 'step': 20992, 'epoch': 3} {'type': 'loss', 'content': 0.055538080632686615, 'timestamp': '2025-09-10 02:50:40.675986', 'step': 20993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:40.706012', 'step': 20993, 'epoch': 3} {'type': 'loss', 'content': 0.07386906445026398, 'timestamp': '2025-09-10 02:50:40.708883', 'step': 20994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:40.739319', 'step': 20994, 'epoch': 3} {'type': 'loss', 'content': 0.0756446123123169, 'timestamp': '2025-09-10 02:50:40.742165', 'step': 20995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:40.772649', 'step': 20995, 'epoch': 3} {'type': 'loss', 'content': 0.09823411703109741, 'timestamp': '2025-09-10 02:50:40.796603', 'step': 20996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:40.828291', 'step': 20996, 'epoch': 3} {'type': 'loss', 'content': 0.009319090284407139, 'timestamp': '2025-09-10 02:50:40.831201', 'step': 20997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:40.861781', 'step': 20997, 'epoch': 3} {'type': 'loss', 'content': 0.04738945513963699, 'timestamp': '2025-09-10 02:50:40.864725', 'step': 20998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:40.896798', 'step': 20998, 'epoch': 3} {'type': 'loss', 'content': 0.06841534376144409, 'timestamp': '2025-09-10 02:50:40.899922', 'step': 20999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:40.932765', 'step': 20999, 'epoch': 3} {'type': 'loss', 'content': 0.053605854511260986, 'timestamp': '2025-09-10 02:50:40.957065', 'step': 21000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21000', 'timestamp': '2025-09-10 02:50:45.721322', 'step': 21000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:45.771650', 'step': 21000, 'epoch': 3} {'type': 'loss', 'content': 0.11789939552545547, 'timestamp': '2025-09-10 02:50:45.774755', 'step': 21001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:45.810139', 'step': 21001, 'epoch': 3} {'type': 'loss', 'content': 0.058908868581056595, 'timestamp': '2025-09-10 02:50:45.812727', 'step': 21002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:45.845194', 'step': 21002, 'epoch': 3} {'type': 'loss', 'content': 0.11579602211713791, 'timestamp': '2025-09-10 02:50:45.847627', 'step': 21003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:45.878807', 'step': 21003, 'epoch': 3} {'type': 'loss', 'content': 0.06115623936057091, 'timestamp': '2025-09-10 02:50:45.903048', 'step': 21004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:45.939272', 'step': 21004, 'epoch': 3} {'type': 'loss', 'content': 0.06326466053724289, 'timestamp': '2025-09-10 02:50:45.941822', 'step': 21005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:45.975177', 'step': 21005, 'epoch': 3} {'type': 'loss', 'content': 0.09328141808509827, 'timestamp': '2025-09-10 02:50:45.977784', 'step': 21006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:46.011854', 'step': 21006, 'epoch': 3} {'type': 'loss', 'content': 0.08473402261734009, 'timestamp': '2025-09-10 02:50:46.014360', 'step': 21007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:46.044720', 'step': 21007, 'epoch': 3} {'type': 'loss', 'content': 0.07131001353263855, 'timestamp': '2025-09-10 02:50:46.069795', 'step': 21008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.101752', 'step': 21008, 'epoch': 3} {'type': 'loss', 'content': 0.02529243566095829, 'timestamp': '2025-09-10 02:50:46.104987', 'step': 21009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.135729', 'step': 21009, 'epoch': 3} {'type': 'loss', 'content': 0.0469428189098835, 'timestamp': '2025-09-10 02:50:46.137907', 'step': 21010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:46.176468', 'step': 21010, 'epoch': 3} {'type': 'loss', 'content': 0.09750690311193466, 'timestamp': '2025-09-10 02:50:46.181594', 'step': 21011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:46.216616', 'step': 21011, 'epoch': 3} {'type': 'loss', 'content': 0.10056868940591812, 'timestamp': '2025-09-10 02:50:46.244101', 'step': 21012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.286552', 'step': 21012, 'epoch': 3} {'type': 'loss', 'content': 0.07530833780765533, 'timestamp': '2025-09-10 02:50:46.289168', 'step': 21013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.320555', 'step': 21013, 'epoch': 3} {'type': 'loss', 'content': 0.08096335828304291, 'timestamp': '2025-09-10 02:50:46.331107', 'step': 21014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.361930', 'step': 21014, 'epoch': 3} {'type': 'loss', 'content': 0.1296066790819168, 'timestamp': '2025-09-10 02:50:46.364978', 'step': 21015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.394101', 'step': 21015, 'epoch': 3} {'type': 'loss', 'content': 0.10254368185997009, 'timestamp': '2025-09-10 02:50:46.417598', 'step': 21016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:46.452088', 'step': 21016, 'epoch': 3} {'type': 'loss', 'content': 0.06979914009571075, 'timestamp': '2025-09-10 02:50:46.455870', 'step': 21017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.487354', 'step': 21017, 'epoch': 3} {'type': 'loss', 'content': 0.06514493376016617, 'timestamp': '2025-09-10 02:50:46.490193', 'step': 21018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:46.523806', 'step': 21018, 'epoch': 3} {'type': 'loss', 'content': 0.07801744341850281, 'timestamp': '2025-09-10 02:50:46.526384', 'step': 21019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:46.557711', 'step': 21019, 'epoch': 3} {'type': 'loss', 'content': 0.0470096580684185, 'timestamp': '2025-09-10 02:50:46.581883', 'step': 21020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:46.616275', 'step': 21020, 'epoch': 3} {'type': 'loss', 'content': 0.07756263762712479, 'timestamp': '2025-09-10 02:50:46.621657', 'step': 21021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.655559', 'step': 21021, 'epoch': 3} {'type': 'loss', 'content': 0.01916191726922989, 'timestamp': '2025-09-10 02:50:46.658182', 'step': 21022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.688403', 'step': 21022, 'epoch': 3} {'type': 'loss', 'content': 0.08540026843547821, 'timestamp': '2025-09-10 02:50:46.695855', 'step': 21023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:46.728404', 'step': 21023, 'epoch': 3} {'type': 'loss', 'content': 0.08114620298147202, 'timestamp': '2025-09-10 02:50:46.757825', 'step': 21024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:46.788273', 'step': 21024, 'epoch': 3} {'type': 'loss', 'content': 0.05381236597895622, 'timestamp': '2025-09-10 02:50:46.790864', 'step': 21025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:46.822423', 'step': 21025, 'epoch': 3} {'type': 'loss', 'content': 0.08389806002378464, 'timestamp': '2025-09-10 02:50:46.824787', 'step': 21026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:46.859027', 'step': 21026, 'epoch': 3} {'type': 'loss', 'content': 0.13439148664474487, 'timestamp': '2025-09-10 02:50:46.861711', 'step': 21027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:46.892060', 'step': 21027, 'epoch': 3} {'type': 'loss', 'content': 0.08452941477298737, 'timestamp': '2025-09-10 02:50:46.922063', 'step': 21028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:46.952621', 'step': 21028, 'epoch': 3} {'type': 'loss', 'content': 0.05814005061984062, 'timestamp': '2025-09-10 02:50:46.955230', 'step': 21029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:46.988954', 'step': 21029, 'epoch': 3} {'type': 'loss', 'content': 0.07195612043142319, 'timestamp': '2025-09-10 02:50:46.993658', 'step': 21030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.028028', 'step': 21030, 'epoch': 3} {'type': 'loss', 'content': 0.0695275217294693, 'timestamp': '2025-09-10 02:50:47.030356', 'step': 21031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:47.063004', 'step': 21031, 'epoch': 3} {'type': 'loss', 'content': 0.09458471089601517, 'timestamp': '2025-09-10 02:50:47.087550', 'step': 21032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.121193', 'step': 21032, 'epoch': 3} {'type': 'loss', 'content': 0.07724004238843918, 'timestamp': '2025-09-10 02:50:47.123903', 'step': 21033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.154124', 'step': 21033, 'epoch': 3} {'type': 'loss', 'content': 0.09023228287696838, 'timestamp': '2025-09-10 02:50:47.160823', 'step': 21034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:47.191184', 'step': 21034, 'epoch': 3} {'type': 'loss', 'content': 0.058415379375219345, 'timestamp': '2025-09-10 02:50:47.195035', 'step': 21035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.223859', 'step': 21035, 'epoch': 3} {'type': 'loss', 'content': 0.07147334516048431, 'timestamp': '2025-09-10 02:50:47.248698', 'step': 21036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:47.282539', 'step': 21036, 'epoch': 3} {'type': 'loss', 'content': 0.0653136745095253, 'timestamp': '2025-09-10 02:50:47.285125', 'step': 21037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:47.324657', 'step': 21037, 'epoch': 3} {'type': 'loss', 'content': 0.03465050458908081, 'timestamp': '2025-09-10 02:50:47.327447', 'step': 21038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:47.359948', 'step': 21038, 'epoch': 3} {'type': 'loss', 'content': 0.028321295976638794, 'timestamp': '2025-09-10 02:50:47.362678', 'step': 21039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:47.393789', 'step': 21039, 'epoch': 3} {'type': 'loss', 'content': 0.11317320913076401, 'timestamp': '2025-09-10 02:50:47.417372', 'step': 21040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:47.448196', 'step': 21040, 'epoch': 3} {'type': 'loss', 'content': 0.055888403207063675, 'timestamp': '2025-09-10 02:50:47.450408', 'step': 21041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:47.481285', 'step': 21041, 'epoch': 3} {'type': 'loss', 'content': 0.0983736515045166, 'timestamp': '2025-09-10 02:50:47.483334', 'step': 21042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.512711', 'step': 21042, 'epoch': 3} {'type': 'loss', 'content': 0.1006074920296669, 'timestamp': '2025-09-10 02:50:47.515222', 'step': 21043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.546146', 'step': 21043, 'epoch': 3} {'type': 'loss', 'content': 0.0667794868350029, 'timestamp': '2025-09-10 02:50:47.571687', 'step': 21044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:47.601925', 'step': 21044, 'epoch': 3} {'type': 'loss', 'content': 0.11142130196094513, 'timestamp': '2025-09-10 02:50:47.604670', 'step': 21045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.636263', 'step': 21045, 'epoch': 3} {'type': 'loss', 'content': 0.11724448949098587, 'timestamp': '2025-09-10 02:50:47.638948', 'step': 21046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:47.669346', 'step': 21046, 'epoch': 3} {'type': 'loss', 'content': 0.030744682997465134, 'timestamp': '2025-09-10 02:50:47.672234', 'step': 21047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:47.703557', 'step': 21047, 'epoch': 3} {'type': 'loss', 'content': 0.07340037822723389, 'timestamp': '2025-09-10 02:50:47.729029', 'step': 21048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.759873', 'step': 21048, 'epoch': 3} {'type': 'loss', 'content': 0.07621186226606369, 'timestamp': '2025-09-10 02:50:47.762365', 'step': 21049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.792572', 'step': 21049, 'epoch': 3} {'type': 'loss', 'content': 0.06427089869976044, 'timestamp': '2025-09-10 02:50:47.795314', 'step': 21050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:47.825894', 'step': 21050, 'epoch': 3} {'type': 'loss', 'content': 0.06926204264163971, 'timestamp': '2025-09-10 02:50:47.828612', 'step': 21051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:47.860295', 'step': 21051, 'epoch': 3} {'type': 'loss', 'content': 0.071159228682518, 'timestamp': '2025-09-10 02:50:47.884001', 'step': 21052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:47.914237', 'step': 21052, 'epoch': 3} {'type': 'loss', 'content': 0.09085671603679657, 'timestamp': '2025-09-10 02:50:47.916703', 'step': 21053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:47.947370', 'step': 21053, 'epoch': 3} {'type': 'loss', 'content': 0.06641343235969543, 'timestamp': '2025-09-10 02:50:47.949716', 'step': 21054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:47.980131', 'step': 21054, 'epoch': 3} {'type': 'loss', 'content': 0.13988405466079712, 'timestamp': '2025-09-10 02:50:47.982512', 'step': 21055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:48.011671', 'step': 21055, 'epoch': 3} {'type': 'loss', 'content': 0.03878886252641678, 'timestamp': '2025-09-10 02:50:48.036934', 'step': 21056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:48.068128', 'step': 21056, 'epoch': 3} {'type': 'loss', 'content': 0.11575447022914886, 'timestamp': '2025-09-10 02:50:48.070326', 'step': 21057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:48.100341', 'step': 21057, 'epoch': 3} {'type': 'loss', 'content': 0.16611219942569733, 'timestamp': '2025-09-10 02:50:48.102826', 'step': 21058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:48.136740', 'step': 21058, 'epoch': 3} {'type': 'loss', 'content': 0.0825347900390625, 'timestamp': '2025-09-10 02:50:48.139294', 'step': 21059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:48.170194', 'step': 21059, 'epoch': 3} {'type': 'loss', 'content': 0.042327333241701126, 'timestamp': '2025-09-10 02:50:48.194062', 'step': 21060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:48.223825', 'step': 21060, 'epoch': 3} {'type': 'loss', 'content': 0.044084060937166214, 'timestamp': '2025-09-10 02:50:48.226574', 'step': 21061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:48.256694', 'step': 21061, 'epoch': 3} {'type': 'loss', 'content': 0.06658533215522766, 'timestamp': '2025-09-10 02:50:48.259365', 'step': 21062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:48.289325', 'step': 21062, 'epoch': 3} {'type': 'loss', 'content': 0.05489187687635422, 'timestamp': '2025-09-10 02:50:48.293090', 'step': 21063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:48.325387', 'step': 21063, 'epoch': 3} {'type': 'loss', 'content': 0.06018616259098053, 'timestamp': '2025-09-10 02:50:48.350954', 'step': 21064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:48.382368', 'step': 21064, 'epoch': 3} {'type': 'loss', 'content': 0.15810729563236237, 'timestamp': '2025-09-10 02:50:48.385447', 'step': 21065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:48.418986', 'step': 21065, 'epoch': 3} {'type': 'loss', 'content': 0.05322624742984772, 'timestamp': '2025-09-10 02:50:48.422019', 'step': 21066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:48.454483', 'step': 21066, 'epoch': 3} {'type': 'loss', 'content': 0.08457659929990768, 'timestamp': '2025-09-10 02:50:48.457859', 'step': 21067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:48.490478', 'step': 21067, 'epoch': 3} {'type': 'loss', 'content': 0.07313970476388931, 'timestamp': '2025-09-10 02:50:48.514331', 'step': 21068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:48.545558', 'step': 21068, 'epoch': 3} {'type': 'loss', 'content': 0.09722196310758591, 'timestamp': '2025-09-10 02:50:48.553244', 'step': 21069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:48.595913', 'step': 21069, 'epoch': 3} {'type': 'loss', 'content': 0.07562390714883804, 'timestamp': '2025-09-10 02:50:48.598315', 'step': 21070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:48.628310', 'step': 21070, 'epoch': 3} {'type': 'loss', 'content': 0.04769325628876686, 'timestamp': '2025-09-10 02:50:48.630505', 'step': 21071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:48.660186', 'step': 21071, 'epoch': 3} {'type': 'loss', 'content': 0.03297731280326843, 'timestamp': '2025-09-10 02:50:48.684302', 'step': 21072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:48.713828', 'step': 21072, 'epoch': 3} {'type': 'loss', 'content': 0.09249208867549896, 'timestamp': '2025-09-10 02:50:48.716480', 'step': 21073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:48.754387', 'step': 21073, 'epoch': 3} {'type': 'loss', 'content': 0.04230824485421181, 'timestamp': '2025-09-10 02:50:48.756679', 'step': 21074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:48.787089', 'step': 21074, 'epoch': 3} {'type': 'loss', 'content': 0.07164844870567322, 'timestamp': '2025-09-10 02:50:48.789429', 'step': 21075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:48.820070', 'step': 21075, 'epoch': 3} {'type': 'loss', 'content': 0.13517269492149353, 'timestamp': '2025-09-10 02:50:48.843587', 'step': 21076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:48.873764', 'step': 21076, 'epoch': 3} {'type': 'loss', 'content': 0.07676887512207031, 'timestamp': '2025-09-10 02:50:48.876535', 'step': 21077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:48.906737', 'step': 21077, 'epoch': 3} {'type': 'loss', 'content': 0.1185707300901413, 'timestamp': '2025-09-10 02:50:48.909357', 'step': 21078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:48.939409', 'step': 21078, 'epoch': 3} {'type': 'loss', 'content': 0.04256696626543999, 'timestamp': '2025-09-10 02:50:48.941913', 'step': 21079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:48.972921', 'step': 21079, 'epoch': 3} {'type': 'loss', 'content': 0.045267265290021896, 'timestamp': '2025-09-10 02:50:48.996726', 'step': 21080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.027473', 'step': 21080, 'epoch': 3} {'type': 'loss', 'content': 0.0582512691617012, 'timestamp': '2025-09-10 02:50:49.030210', 'step': 21081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.061049', 'step': 21081, 'epoch': 3} {'type': 'loss', 'content': 0.08950026333332062, 'timestamp': '2025-09-10 02:50:49.064378', 'step': 21082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:49.094786', 'step': 21082, 'epoch': 3} {'type': 'loss', 'content': 0.05063324794173241, 'timestamp': '2025-09-10 02:50:49.097349', 'step': 21083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.128393', 'step': 21083, 'epoch': 3} {'type': 'loss', 'content': 0.08135402202606201, 'timestamp': '2025-09-10 02:50:49.152643', 'step': 21084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.184531', 'step': 21084, 'epoch': 3} {'type': 'loss', 'content': 0.03690478578209877, 'timestamp': '2025-09-10 02:50:49.186886', 'step': 21085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:49.216490', 'step': 21085, 'epoch': 3} {'type': 'loss', 'content': 0.041405849158763885, 'timestamp': '2025-09-10 02:50:49.219286', 'step': 21086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:49.249816', 'step': 21086, 'epoch': 3} {'type': 'loss', 'content': 0.07985101640224457, 'timestamp': '2025-09-10 02:50:49.252223', 'step': 21087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:49.282265', 'step': 21087, 'epoch': 3} {'type': 'loss', 'content': 0.03490789607167244, 'timestamp': '2025-09-10 02:50:49.305598', 'step': 21088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.337329', 'step': 21088, 'epoch': 3} {'type': 'loss', 'content': 0.05811726301908493, 'timestamp': '2025-09-10 02:50:49.340173', 'step': 21089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.372062', 'step': 21089, 'epoch': 3} {'type': 'loss', 'content': 0.05752906948328018, 'timestamp': '2025-09-10 02:50:49.374635', 'step': 21090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:49.405962', 'step': 21090, 'epoch': 3} {'type': 'loss', 'content': 0.06931457668542862, 'timestamp': '2025-09-10 02:50:49.408691', 'step': 21091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.439436', 'step': 21091, 'epoch': 3} {'type': 'loss', 'content': 0.058050643652677536, 'timestamp': '2025-09-10 02:50:49.463011', 'step': 21092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.493520', 'step': 21092, 'epoch': 3} {'type': 'loss', 'content': 0.012880577705800533, 'timestamp': '2025-09-10 02:50:49.496009', 'step': 21093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:49.526520', 'step': 21093, 'epoch': 3} {'type': 'loss', 'content': 0.1204744204878807, 'timestamp': '2025-09-10 02:50:49.529184', 'step': 21094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:49.561773', 'step': 21094, 'epoch': 3} {'type': 'loss', 'content': 0.05414128303527832, 'timestamp': '2025-09-10 02:50:49.567151', 'step': 21095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.598389', 'step': 21095, 'epoch': 3} {'type': 'loss', 'content': 0.056854620575904846, 'timestamp': '2025-09-10 02:50:49.622351', 'step': 21096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:49.652785', 'step': 21096, 'epoch': 3} {'type': 'loss', 'content': 0.07519125193357468, 'timestamp': '2025-09-10 02:50:49.655580', 'step': 21097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.686383', 'step': 21097, 'epoch': 3} {'type': 'loss', 'content': 0.08916069567203522, 'timestamp': '2025-09-10 02:50:49.688641', 'step': 21098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:49.718329', 'step': 21098, 'epoch': 3} {'type': 'loss', 'content': 0.02936464548110962, 'timestamp': '2025-09-10 02:50:49.720985', 'step': 21099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:49.753153', 'step': 21099, 'epoch': 3} {'type': 'loss', 'content': 0.10234873741865158, 'timestamp': '2025-09-10 02:50:49.776604', 'step': 21100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:49.807675', 'step': 21100, 'epoch': 3} {'type': 'loss', 'content': 0.02598121017217636, 'timestamp': '2025-09-10 02:50:49.810248', 'step': 21101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:49.840635', 'step': 21101, 'epoch': 3} {'type': 'loss', 'content': 0.06652998924255371, 'timestamp': '2025-09-10 02:50:49.843414', 'step': 21102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.873603', 'step': 21102, 'epoch': 3} {'type': 'loss', 'content': 0.12955862283706665, 'timestamp': '2025-09-10 02:50:49.875877', 'step': 21103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:49.906939', 'step': 21103, 'epoch': 3} {'type': 'loss', 'content': 0.07056993991136551, 'timestamp': '2025-09-10 02:50:49.932910', 'step': 21104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:49.964519', 'step': 21104, 'epoch': 3} {'type': 'loss', 'content': 0.06795459240674973, 'timestamp': '2025-09-10 02:50:49.967309', 'step': 21105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:50.000788', 'step': 21105, 'epoch': 3} {'type': 'loss', 'content': 0.04046366363763809, 'timestamp': '2025-09-10 02:50:50.003507', 'step': 21106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.038652', 'step': 21106, 'epoch': 3} {'type': 'loss', 'content': 0.07155591249465942, 'timestamp': '2025-09-10 02:50:50.041471', 'step': 21107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.072286', 'step': 21107, 'epoch': 3} {'type': 'loss', 'content': 0.06595100462436676, 'timestamp': '2025-09-10 02:50:50.096176', 'step': 21108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:50.126766', 'step': 21108, 'epoch': 3} {'type': 'loss', 'content': 0.025942238047719002, 'timestamp': '2025-09-10 02:50:50.129668', 'step': 21109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:50.161132', 'step': 21109, 'epoch': 3} {'type': 'loss', 'content': 0.14994999766349792, 'timestamp': '2025-09-10 02:50:50.163475', 'step': 21110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:50.193913', 'step': 21110, 'epoch': 3} {'type': 'loss', 'content': 0.046591103076934814, 'timestamp': '2025-09-10 02:50:50.197251', 'step': 21111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:50.229324', 'step': 21111, 'epoch': 3} {'type': 'loss', 'content': 0.1030743420124054, 'timestamp': '2025-09-10 02:50:50.252995', 'step': 21112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:50.283538', 'step': 21112, 'epoch': 3} {'type': 'loss', 'content': 0.09180648624897003, 'timestamp': '2025-09-10 02:50:50.285941', 'step': 21113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.315333', 'step': 21113, 'epoch': 3} {'type': 'loss', 'content': 0.15097355842590332, 'timestamp': '2025-09-10 02:50:50.317914', 'step': 21114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:50.348628', 'step': 21114, 'epoch': 3} {'type': 'loss', 'content': 0.05902897194027901, 'timestamp': '2025-09-10 02:50:50.351755', 'step': 21115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.383044', 'step': 21115, 'epoch': 3} {'type': 'loss', 'content': 0.04892631620168686, 'timestamp': '2025-09-10 02:50:50.407155', 'step': 21116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.438519', 'step': 21116, 'epoch': 3} {'type': 'loss', 'content': 0.0778140053153038, 'timestamp': '2025-09-10 02:50:50.440906', 'step': 21117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:50.472584', 'step': 21117, 'epoch': 3} {'type': 'loss', 'content': 0.03150792047381401, 'timestamp': '2025-09-10 02:50:50.475239', 'step': 21118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.506353', 'step': 21118, 'epoch': 3} {'type': 'loss', 'content': 0.058762114495038986, 'timestamp': '2025-09-10 02:50:50.508977', 'step': 21119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:50.539707', 'step': 21119, 'epoch': 3} {'type': 'loss', 'content': 0.025882095098495483, 'timestamp': '2025-09-10 02:50:50.563580', 'step': 21120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.603676', 'step': 21120, 'epoch': 3} {'type': 'loss', 'content': 0.12844853103160858, 'timestamp': '2025-09-10 02:50:50.605986', 'step': 21121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:50.636320', 'step': 21121, 'epoch': 3} {'type': 'loss', 'content': 0.06502541899681091, 'timestamp': '2025-09-10 02:50:50.638849', 'step': 21122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.669471', 'step': 21122, 'epoch': 3} {'type': 'loss', 'content': 0.05552733317017555, 'timestamp': '2025-09-10 02:50:50.671876', 'step': 21123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:50.702943', 'step': 21123, 'epoch': 3} {'type': 'loss', 'content': 0.10214601457118988, 'timestamp': '2025-09-10 02:50:50.726899', 'step': 21124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:50.759351', 'step': 21124, 'epoch': 3} {'type': 'loss', 'content': 0.04767303541302681, 'timestamp': '2025-09-10 02:50:50.762020', 'step': 21125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:50.792845', 'step': 21125, 'epoch': 3} {'type': 'loss', 'content': 0.20070545375347137, 'timestamp': '2025-09-10 02:50:50.794918', 'step': 21126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.825709', 'step': 21126, 'epoch': 3} {'type': 'loss', 'content': 0.061901506036520004, 'timestamp': '2025-09-10 02:50:50.828094', 'step': 21127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:50.858292', 'step': 21127, 'epoch': 3} {'type': 'loss', 'content': 0.048611901700496674, 'timestamp': '2025-09-10 02:50:50.882167', 'step': 21128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:50.912681', 'step': 21128, 'epoch': 3} {'type': 'loss', 'content': 0.09486307203769684, 'timestamp': '2025-09-10 02:50:50.916122', 'step': 21129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:50.946919', 'step': 21129, 'epoch': 3} {'type': 'loss', 'content': 0.11219312995672226, 'timestamp': '2025-09-10 02:50:50.949443', 'step': 21130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:50.982122', 'step': 21130, 'epoch': 3} {'type': 'loss', 'content': 0.1009989082813263, 'timestamp': '2025-09-10 02:50:50.984779', 'step': 21131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:51.015188', 'step': 21131, 'epoch': 3} {'type': 'loss', 'content': 0.054672304540872574, 'timestamp': '2025-09-10 02:50:51.038983', 'step': 21132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:51.069978', 'step': 21132, 'epoch': 3} {'type': 'loss', 'content': 0.10542744398117065, 'timestamp': '2025-09-10 02:50:51.072571', 'step': 21133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:51.103745', 'step': 21133, 'epoch': 3} {'type': 'loss', 'content': 0.0737602487206459, 'timestamp': '2025-09-10 02:50:51.106027', 'step': 21134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:51.137524', 'step': 21134, 'epoch': 3} {'type': 'loss', 'content': 0.03323916345834732, 'timestamp': '2025-09-10 02:50:51.139866', 'step': 21135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:51.170187', 'step': 21135, 'epoch': 3} {'type': 'loss', 'content': 0.03329968824982643, 'timestamp': '2025-09-10 02:50:51.193701', 'step': 21136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:51.225498', 'step': 21136, 'epoch': 3} {'type': 'loss', 'content': 0.05249398574233055, 'timestamp': '2025-09-10 02:50:51.228154', 'step': 21137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:51.258018', 'step': 21137, 'epoch': 3} {'type': 'loss', 'content': 0.10480881482362747, 'timestamp': '2025-09-10 02:50:51.260654', 'step': 21138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:51.291223', 'step': 21138, 'epoch': 3} {'type': 'loss', 'content': 0.08369117230176926, 'timestamp': '2025-09-10 02:50:51.294085', 'step': 21139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:51.324205', 'step': 21139, 'epoch': 3} {'type': 'loss', 'content': 0.06550193578004837, 'timestamp': '2025-09-10 02:50:51.347905', 'step': 21140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:51.381725', 'step': 21140, 'epoch': 3} {'type': 'loss', 'content': 0.050702694803476334, 'timestamp': '2025-09-10 02:50:51.384889', 'step': 21141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:51.415966', 'step': 21141, 'epoch': 3} {'type': 'loss', 'content': 0.05018943175673485, 'timestamp': '2025-09-10 02:50:51.418427', 'step': 21142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:51.448841', 'step': 21142, 'epoch': 3} {'type': 'loss', 'content': 0.031106160953640938, 'timestamp': '2025-09-10 02:50:51.451553', 'step': 21143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:51.483524', 'step': 21143, 'epoch': 3} {'type': 'loss', 'content': 0.04822712019085884, 'timestamp': '2025-09-10 02:50:51.509960', 'step': 21144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:51.545289', 'step': 21144, 'epoch': 3} {'type': 'loss', 'content': 0.015547187067568302, 'timestamp': '2025-09-10 02:50:51.550924', 'step': 21145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:51.590947', 'step': 21145, 'epoch': 3} {'type': 'loss', 'content': 0.029034771025180817, 'timestamp': '2025-09-10 02:50:51.597065', 'step': 21146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:51.635259', 'step': 21146, 'epoch': 3} {'type': 'loss', 'content': 0.09488711506128311, 'timestamp': '2025-09-10 02:50:51.639913', 'step': 21147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:51.680912', 'step': 21147, 'epoch': 3} {'type': 'loss', 'content': 0.08808360993862152, 'timestamp': '2025-09-10 02:50:51.723633', 'step': 21148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:51.809127', 'step': 21148, 'epoch': 3} {'type': 'loss', 'content': 0.03995150700211525, 'timestamp': '2025-09-10 02:50:51.829166', 'step': 21149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:51.909769', 'step': 21149, 'epoch': 3} {'type': 'loss', 'content': 0.07299385964870453, 'timestamp': '2025-09-10 02:50:51.916720', 'step': 21150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:51.976548', 'step': 21150, 'epoch': 3} {'type': 'loss', 'content': 0.03462282195687294, 'timestamp': '2025-09-10 02:50:51.995859', 'step': 21151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:52.079126', 'step': 21151, 'epoch': 3} {'type': 'loss', 'content': 0.13019339740276337, 'timestamp': '2025-09-10 02:50:52.104199', 'step': 21152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:52.196916', 'step': 21152, 'epoch': 3} {'type': 'loss', 'content': 0.04313105717301369, 'timestamp': '2025-09-10 02:50:52.203451', 'step': 21153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:52.258792', 'step': 21153, 'epoch': 3} {'type': 'loss', 'content': 0.06928940862417221, 'timestamp': '2025-09-10 02:50:52.277792', 'step': 21154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:52.363630', 'step': 21154, 'epoch': 3} {'type': 'loss', 'content': 0.05763823911547661, 'timestamp': '2025-09-10 02:50:52.366777', 'step': 21155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:52.415625', 'step': 21155, 'epoch': 3} {'type': 'loss', 'content': 0.026230989024043083, 'timestamp': '2025-09-10 02:50:52.443137', 'step': 21156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:52.478643', 'step': 21156, 'epoch': 3} {'type': 'loss', 'content': 0.08395808190107346, 'timestamp': '2025-09-10 02:50:52.500428', 'step': 21157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:52.552430', 'step': 21157, 'epoch': 3} {'type': 'loss', 'content': 0.05668999254703522, 'timestamp': '2025-09-10 02:50:52.572267', 'step': 21158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:52.653911', 'step': 21158, 'epoch': 3} {'type': 'loss', 'content': 0.068165123462677, 'timestamp': '2025-09-10 02:50:52.656620', 'step': 21159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:52.703512', 'step': 21159, 'epoch': 3} {'type': 'loss', 'content': 0.09588611125946045, 'timestamp': '2025-09-10 02:50:52.743783', 'step': 21160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:52.831189', 'step': 21160, 'epoch': 3} {'type': 'loss', 'content': 0.048944711685180664, 'timestamp': '2025-09-10 02:50:52.851064', 'step': 21161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:52.933674', 'step': 21161, 'epoch': 3} {'type': 'loss', 'content': 0.14207957684993744, 'timestamp': '2025-09-10 02:50:52.947209', 'step': 21162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:52.989951', 'step': 21162, 'epoch': 3} {'type': 'loss', 'content': 0.06848740577697754, 'timestamp': '2025-09-10 02:50:52.991997', 'step': 21163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:53.023938', 'step': 21163, 'epoch': 3} {'type': 'loss', 'content': 0.04962877184152603, 'timestamp': '2025-09-10 02:50:53.047782', 'step': 21164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:53.085736', 'step': 21164, 'epoch': 3} {'type': 'loss', 'content': 0.02520603872835636, 'timestamp': '2025-09-10 02:50:53.091647', 'step': 21165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:53.133480', 'step': 21165, 'epoch': 3} {'type': 'loss', 'content': 0.04652230069041252, 'timestamp': '2025-09-10 02:50:53.139198', 'step': 21166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:53.180524', 'step': 21166, 'epoch': 3} {'type': 'loss', 'content': 0.05714750662446022, 'timestamp': '2025-09-10 02:50:53.185963', 'step': 21167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:53.226970', 'step': 21167, 'epoch': 3} {'type': 'loss', 'content': 0.022127782925963402, 'timestamp': '2025-09-10 02:50:53.253098', 'step': 21168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:53.286053', 'step': 21168, 'epoch': 3} {'type': 'loss', 'content': 0.0840330645442009, 'timestamp': '2025-09-10 02:50:53.288737', 'step': 21169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:53.320784', 'step': 21169, 'epoch': 3} {'type': 'loss', 'content': 0.09281253069639206, 'timestamp': '2025-09-10 02:50:53.324508', 'step': 21170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:53.355919', 'step': 21170, 'epoch': 3} {'type': 'loss', 'content': 0.03790145367383957, 'timestamp': '2025-09-10 02:50:53.358384', 'step': 21171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:53.391690', 'step': 21171, 'epoch': 3} {'type': 'loss', 'content': 0.04784673452377319, 'timestamp': '2025-09-10 02:50:53.417577', 'step': 21172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:53.450163', 'step': 21172, 'epoch': 3} {'type': 'loss', 'content': 0.03576301783323288, 'timestamp': '2025-09-10 02:50:53.452542', 'step': 21173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:53.485206', 'step': 21173, 'epoch': 3} {'type': 'loss', 'content': 0.14860378205776215, 'timestamp': '2025-09-10 02:50:53.488290', 'step': 21174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:53.519564', 'step': 21174, 'epoch': 3} {'type': 'loss', 'content': 0.03607549890875816, 'timestamp': '2025-09-10 02:50:53.522234', 'step': 21175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:50:53.553457', 'step': 21175, 'epoch': 3} {'type': 'loss', 'content': 0.03996986895799637, 'timestamp': '2025-09-10 02:50:53.578416', 'step': 21176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:53.632329', 'step': 21176, 'epoch': 3} {'type': 'loss', 'content': 0.05570419877767563, 'timestamp': '2025-09-10 02:50:53.634909', 'step': 21177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:53.665638', 'step': 21177, 'epoch': 3} {'type': 'loss', 'content': 0.07501810044050217, 'timestamp': '2025-09-10 02:50:53.668543', 'step': 21178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:50:53.700029', 'step': 21178, 'epoch': 3} {'type': 'loss', 'content': 0.09865469485521317, 'timestamp': '2025-09-10 02:50:53.704504', 'step': 21179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:53.738083', 'step': 21179, 'epoch': 3} {'type': 'loss', 'content': 0.0915127545595169, 'timestamp': '2025-09-10 02:50:53.763932', 'step': 21180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:53.796126', 'step': 21180, 'epoch': 3} {'type': 'loss', 'content': 0.020841678604483604, 'timestamp': '2025-09-10 02:50:53.798903', 'step': 21181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:53.831930', 'step': 21181, 'epoch': 3} {'type': 'loss', 'content': 0.025932276621460915, 'timestamp': '2025-09-10 02:50:53.835533', 'step': 21182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:53.866684', 'step': 21182, 'epoch': 3} {'type': 'loss', 'content': 0.0352223664522171, 'timestamp': '2025-09-10 02:50:53.869543', 'step': 21183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:53.900383', 'step': 21183, 'epoch': 3} {'type': 'loss', 'content': 0.09271612018346786, 'timestamp': '2025-09-10 02:50:53.925411', 'step': 21184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:53.960840', 'step': 21184, 'epoch': 3} {'type': 'loss', 'content': 0.052383940666913986, 'timestamp': '2025-09-10 02:50:53.963550', 'step': 21185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:53.994590', 'step': 21185, 'epoch': 3} {'type': 'loss', 'content': 0.11205419898033142, 'timestamp': '2025-09-10 02:50:53.997006', 'step': 21186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.027952', 'step': 21186, 'epoch': 3} {'type': 'loss', 'content': 0.09220245480537415, 'timestamp': '2025-09-10 02:50:54.030646', 'step': 21187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.061606', 'step': 21187, 'epoch': 3} {'type': 'loss', 'content': 0.0519004724919796, 'timestamp': '2025-09-10 02:50:54.085313', 'step': 21188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.115635', 'step': 21188, 'epoch': 3} {'type': 'loss', 'content': 0.09760402143001556, 'timestamp': '2025-09-10 02:50:54.118329', 'step': 21189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:54.152049', 'step': 21189, 'epoch': 3} {'type': 'loss', 'content': 0.06130913272500038, 'timestamp': '2025-09-10 02:50:54.157531', 'step': 21190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:54.188426', 'step': 21190, 'epoch': 3} {'type': 'loss', 'content': 0.056463148444890976, 'timestamp': '2025-09-10 02:50:54.190902', 'step': 21191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.220403', 'step': 21191, 'epoch': 3} {'type': 'loss', 'content': 0.06053371727466583, 'timestamp': '2025-09-10 02:50:54.244998', 'step': 21192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:54.277110', 'step': 21192, 'epoch': 3} {'type': 'loss', 'content': 0.040198080241680145, 'timestamp': '2025-09-10 02:50:54.281579', 'step': 21193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:54.313540', 'step': 21193, 'epoch': 3} {'type': 'loss', 'content': 0.11146550625562668, 'timestamp': '2025-09-10 02:50:54.316186', 'step': 21194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:54.346302', 'step': 21194, 'epoch': 3} {'type': 'loss', 'content': 0.05754856392741203, 'timestamp': '2025-09-10 02:50:54.352947', 'step': 21195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.386720', 'step': 21195, 'epoch': 3} {'type': 'loss', 'content': 0.15671585500240326, 'timestamp': '2025-09-10 02:50:54.410690', 'step': 21196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:54.441130', 'step': 21196, 'epoch': 3} {'type': 'loss', 'content': 0.03038012981414795, 'timestamp': '2025-09-10 02:50:54.447932', 'step': 21197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.483539', 'step': 21197, 'epoch': 3} {'type': 'loss', 'content': 0.10184502601623535, 'timestamp': '2025-09-10 02:50:54.485990', 'step': 21198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:54.518403', 'step': 21198, 'epoch': 3} {'type': 'loss', 'content': 0.08592940121889114, 'timestamp': '2025-09-10 02:50:54.521480', 'step': 21199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:54.562493', 'step': 21199, 'epoch': 3} {'type': 'loss', 'content': 0.05520893260836601, 'timestamp': '2025-09-10 02:50:54.586652', 'step': 21200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:54.620621', 'step': 21200, 'epoch': 3} {'type': 'loss', 'content': 0.08232130110263824, 'timestamp': '2025-09-10 02:50:54.624443', 'step': 21201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.654801', 'step': 21201, 'epoch': 3} {'type': 'loss', 'content': 0.02603227086365223, 'timestamp': '2025-09-10 02:50:54.658082', 'step': 21202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:54.688940', 'step': 21202, 'epoch': 3} {'type': 'loss', 'content': 0.11338286101818085, 'timestamp': '2025-09-10 02:50:54.691766', 'step': 21203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:54.722031', 'step': 21203, 'epoch': 3} {'type': 'loss', 'content': 0.05766213312745094, 'timestamp': '2025-09-10 02:50:54.746046', 'step': 21204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:54.778961', 'step': 21204, 'epoch': 3} {'type': 'loss', 'content': 0.09536609053611755, 'timestamp': '2025-09-10 02:50:54.781476', 'step': 21205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:54.817336', 'step': 21205, 'epoch': 3} {'type': 'loss', 'content': 0.041362375020980835, 'timestamp': '2025-09-10 02:50:54.823310', 'step': 21206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.860585', 'step': 21206, 'epoch': 3} {'type': 'loss', 'content': 0.06372889131307602, 'timestamp': '2025-09-10 02:50:54.863108', 'step': 21207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:54.893494', 'step': 21207, 'epoch': 3} {'type': 'loss', 'content': 0.04103371500968933, 'timestamp': '2025-09-10 02:50:54.916795', 'step': 21208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.947371', 'step': 21208, 'epoch': 3} {'type': 'loss', 'content': 0.03920237720012665, 'timestamp': '2025-09-10 02:50:54.950376', 'step': 21209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:54.981122', 'step': 21209, 'epoch': 3} {'type': 'loss', 'content': 0.06891334801912308, 'timestamp': '2025-09-10 02:50:54.984000', 'step': 21210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:55.014970', 'step': 21210, 'epoch': 3} {'type': 'loss', 'content': 0.08607002347707748, 'timestamp': '2025-09-10 02:50:55.017717', 'step': 21211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:55.053031', 'step': 21211, 'epoch': 3} {'type': 'loss', 'content': 0.13440611958503723, 'timestamp': '2025-09-10 02:50:55.076892', 'step': 21212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:55.107214', 'step': 21212, 'epoch': 3} {'type': 'loss', 'content': 0.03165603429079056, 'timestamp': '2025-09-10 02:50:55.109624', 'step': 21213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:55.140616', 'step': 21213, 'epoch': 3} {'type': 'loss', 'content': 0.18321940302848816, 'timestamp': '2025-09-10 02:50:55.143137', 'step': 21214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:55.174341', 'step': 21214, 'epoch': 3} {'type': 'loss', 'content': 0.13693876564502716, 'timestamp': '2025-09-10 02:50:55.177277', 'step': 21215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:55.210569', 'step': 21215, 'epoch': 3} {'type': 'loss', 'content': 0.05367845669388771, 'timestamp': '2025-09-10 02:50:55.234107', 'step': 21216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:55.265269', 'step': 21216, 'epoch': 3} {'type': 'loss', 'content': 0.044938087463378906, 'timestamp': '2025-09-10 02:50:55.267545', 'step': 21217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:55.297772', 'step': 21217, 'epoch': 3} {'type': 'loss', 'content': 0.10834275186061859, 'timestamp': '2025-09-10 02:50:55.299767', 'step': 21218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:55.332252', 'step': 21218, 'epoch': 3} {'type': 'loss', 'content': 0.0972171425819397, 'timestamp': '2025-09-10 02:50:55.335590', 'step': 21219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:55.366738', 'step': 21219, 'epoch': 3} {'type': 'loss', 'content': 0.16933666169643402, 'timestamp': '2025-09-10 02:50:55.390605', 'step': 21220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:55.420300', 'step': 21220, 'epoch': 3} {'type': 'loss', 'content': 0.07969420403242111, 'timestamp': '2025-09-10 02:50:55.424272', 'step': 21221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:55.454210', 'step': 21221, 'epoch': 3} {'type': 'loss', 'content': 0.10250721126794815, 'timestamp': '2025-09-10 02:50:55.456868', 'step': 21222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:55.488447', 'step': 21222, 'epoch': 3} {'type': 'loss', 'content': 0.027313122525811195, 'timestamp': '2025-09-10 02:50:55.491279', 'step': 21223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:55.522413', 'step': 21223, 'epoch': 3} {'type': 'loss', 'content': 0.01923656091094017, 'timestamp': '2025-09-10 02:50:55.548669', 'step': 21224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:55.580528', 'step': 21224, 'epoch': 3} {'type': 'loss', 'content': 0.07287495583295822, 'timestamp': '2025-09-10 02:50:55.583125', 'step': 21225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:55.613480', 'step': 21225, 'epoch': 3} {'type': 'loss', 'content': 0.06744207441806793, 'timestamp': '2025-09-10 02:50:55.615575', 'step': 21226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:55.645881', 'step': 21226, 'epoch': 3} {'type': 'loss', 'content': 0.16735060513019562, 'timestamp': '2025-09-10 02:50:55.648468', 'step': 21227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:50:55.679297', 'step': 21227, 'epoch': 3} {'type': 'loss', 'content': 0.03174607828259468, 'timestamp': '2025-09-10 02:50:55.704715', 'step': 21228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:55.734892', 'step': 21228, 'epoch': 3} {'type': 'loss', 'content': 0.04438324645161629, 'timestamp': '2025-09-10 02:50:55.737098', 'step': 21229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:55.766686', 'step': 21229, 'epoch': 3} {'type': 'loss', 'content': 0.0640692412853241, 'timestamp': '2025-09-10 02:50:55.769649', 'step': 21230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:55.800537', 'step': 21230, 'epoch': 3} {'type': 'loss', 'content': 0.07908784598112106, 'timestamp': '2025-09-10 02:50:55.802800', 'step': 21231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:55.833932', 'step': 21231, 'epoch': 3} {'type': 'loss', 'content': 0.011200924403965473, 'timestamp': '2025-09-10 02:50:55.857735', 'step': 21232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:55.889125', 'step': 21232, 'epoch': 3} {'type': 'loss', 'content': 0.10717926174402237, 'timestamp': '2025-09-10 02:50:55.891558', 'step': 21233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:55.921473', 'step': 21233, 'epoch': 3} {'type': 'loss', 'content': 0.03949359059333801, 'timestamp': '2025-09-10 02:50:55.924283', 'step': 21234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:55.956500', 'step': 21234, 'epoch': 3} {'type': 'loss', 'content': 0.12302734702825546, 'timestamp': '2025-09-10 02:50:55.960508', 'step': 21235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:55.993402', 'step': 21235, 'epoch': 3} {'type': 'loss', 'content': 0.12134986370801926, 'timestamp': '2025-09-10 02:50:56.016867', 'step': 21236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.048956', 'step': 21236, 'epoch': 3} {'type': 'loss', 'content': 0.1103939414024353, 'timestamp': '2025-09-10 02:50:56.051474', 'step': 21237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:56.082043', 'step': 21237, 'epoch': 3} {'type': 'loss', 'content': 0.10546206682920456, 'timestamp': '2025-09-10 02:50:56.084513', 'step': 21238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.115359', 'step': 21238, 'epoch': 3} {'type': 'loss', 'content': 0.12249263375997543, 'timestamp': '2025-09-10 02:50:56.118126', 'step': 21239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:56.148966', 'step': 21239, 'epoch': 3} {'type': 'loss', 'content': 0.049602244049310684, 'timestamp': '2025-09-10 02:50:56.172755', 'step': 21240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.204767', 'step': 21240, 'epoch': 3} {'type': 'loss', 'content': 0.06839946657419205, 'timestamp': '2025-09-10 02:50:56.208353', 'step': 21241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:56.242337', 'step': 21241, 'epoch': 3} {'type': 'loss', 'content': 0.1501551866531372, 'timestamp': '2025-09-10 02:50:56.245269', 'step': 21242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:56.277526', 'step': 21242, 'epoch': 3} {'type': 'loss', 'content': 0.07499720901250839, 'timestamp': '2025-09-10 02:50:56.280492', 'step': 21243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.314909', 'step': 21243, 'epoch': 3} {'type': 'loss', 'content': 0.057907357811927795, 'timestamp': '2025-09-10 02:50:56.339158', 'step': 21244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:56.371922', 'step': 21244, 'epoch': 3} {'type': 'loss', 'content': 0.04157336428761482, 'timestamp': '2025-09-10 02:50:56.374981', 'step': 21245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:56.409613', 'step': 21245, 'epoch': 3} {'type': 'loss', 'content': 0.0413370244204998, 'timestamp': '2025-09-10 02:50:56.412147', 'step': 21246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:56.443849', 'step': 21246, 'epoch': 3} {'type': 'loss', 'content': 0.05433963984251022, 'timestamp': '2025-09-10 02:50:56.446205', 'step': 21247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.476854', 'step': 21247, 'epoch': 3} {'type': 'loss', 'content': 0.05753767490386963, 'timestamp': '2025-09-10 02:50:56.503897', 'step': 21248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:56.538524', 'step': 21248, 'epoch': 3} {'type': 'loss', 'content': 0.10666053742170334, 'timestamp': '2025-09-10 02:50:56.540996', 'step': 21249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.572090', 'step': 21249, 'epoch': 3} {'type': 'loss', 'content': 0.06452729552984238, 'timestamp': '2025-09-10 02:50:56.574620', 'step': 21250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.609755', 'step': 21250, 'epoch': 3} {'type': 'loss', 'content': 0.1033102348446846, 'timestamp': '2025-09-10 02:50:56.612084', 'step': 21251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.642510', 'step': 21251, 'epoch': 3} {'type': 'loss', 'content': 0.038710035383701324, 'timestamp': '2025-09-10 02:50:56.666011', 'step': 21252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:56.695716', 'step': 21252, 'epoch': 3} {'type': 'loss', 'content': 0.03760465234518051, 'timestamp': '2025-09-10 02:50:56.697885', 'step': 21253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:56.729888', 'step': 21253, 'epoch': 3} {'type': 'loss', 'content': 0.08183994144201279, 'timestamp': '2025-09-10 02:50:56.732314', 'step': 21254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.765231', 'step': 21254, 'epoch': 3} {'type': 'loss', 'content': 0.06972431391477585, 'timestamp': '2025-09-10 02:50:56.767910', 'step': 21255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.798341', 'step': 21255, 'epoch': 3} {'type': 'loss', 'content': 0.13958659768104553, 'timestamp': '2025-09-10 02:50:56.821581', 'step': 21256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:56.857115', 'step': 21256, 'epoch': 3} {'type': 'loss', 'content': 0.09779992699623108, 'timestamp': '2025-09-10 02:50:56.859943', 'step': 21257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:56.890380', 'step': 21257, 'epoch': 3} {'type': 'loss', 'content': 0.08570557087659836, 'timestamp': '2025-09-10 02:50:56.895082', 'step': 21258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:56.933417', 'step': 21258, 'epoch': 3} {'type': 'loss', 'content': 0.07606829702854156, 'timestamp': '2025-09-10 02:50:56.935870', 'step': 21259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:56.973485', 'step': 21259, 'epoch': 3} {'type': 'loss', 'content': 0.07194498181343079, 'timestamp': '2025-09-10 02:50:56.998210', 'step': 21260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:57.029594', 'step': 21260, 'epoch': 3} {'type': 'loss', 'content': 0.04899028688669205, 'timestamp': '2025-09-10 02:50:57.032341', 'step': 21261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:57.065032', 'step': 21261, 'epoch': 3} {'type': 'loss', 'content': 0.11550077050924301, 'timestamp': '2025-09-10 02:50:57.067828', 'step': 21262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.098868', 'step': 21262, 'epoch': 3} {'type': 'loss', 'content': 0.14031393826007843, 'timestamp': '2025-09-10 02:50:57.101465', 'step': 21263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.131267', 'step': 21263, 'epoch': 3} {'type': 'loss', 'content': 0.053955744951963425, 'timestamp': '2025-09-10 02:50:57.155529', 'step': 21264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:57.186568', 'step': 21264, 'epoch': 3} {'type': 'loss', 'content': 0.09210171550512314, 'timestamp': '2025-09-10 02:50:57.194856', 'step': 21265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.225723', 'step': 21265, 'epoch': 3} {'type': 'loss', 'content': 0.0376020111143589, 'timestamp': '2025-09-10 02:50:57.227996', 'step': 21266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.262600', 'step': 21266, 'epoch': 3} {'type': 'loss', 'content': 0.09386500716209412, 'timestamp': '2025-09-10 02:50:57.265189', 'step': 21267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:57.296382', 'step': 21267, 'epoch': 3} {'type': 'loss', 'content': 0.0761772096157074, 'timestamp': '2025-09-10 02:50:57.320138', 'step': 21268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:57.352903', 'step': 21268, 'epoch': 3} {'type': 'loss', 'content': 0.04895356670022011, 'timestamp': '2025-09-10 02:50:57.355327', 'step': 21269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.385801', 'step': 21269, 'epoch': 3} {'type': 'loss', 'content': 0.09134162217378616, 'timestamp': '2025-09-10 02:50:57.388320', 'step': 21270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:57.419358', 'step': 21270, 'epoch': 3} {'type': 'loss', 'content': 0.046392980962991714, 'timestamp': '2025-09-10 02:50:57.421985', 'step': 21271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:57.454424', 'step': 21271, 'epoch': 3} {'type': 'loss', 'content': 0.05463014915585518, 'timestamp': '2025-09-10 02:50:57.478025', 'step': 21272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.508721', 'step': 21272, 'epoch': 3} {'type': 'loss', 'content': 0.06821063160896301, 'timestamp': '2025-09-10 02:50:57.511460', 'step': 21273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:57.542324', 'step': 21273, 'epoch': 3} {'type': 'loss', 'content': 0.07470165938138962, 'timestamp': '2025-09-10 02:50:57.544963', 'step': 21274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:57.576821', 'step': 21274, 'epoch': 3} {'type': 'loss', 'content': 0.13936319947242737, 'timestamp': '2025-09-10 02:50:57.579130', 'step': 21275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.608984', 'step': 21275, 'epoch': 3} {'type': 'loss', 'content': 0.139158695936203, 'timestamp': '2025-09-10 02:50:57.634128', 'step': 21276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:57.664927', 'step': 21276, 'epoch': 3} {'type': 'loss', 'content': 0.03500977158546448, 'timestamp': '2025-09-10 02:50:57.667320', 'step': 21277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.697277', 'step': 21277, 'epoch': 3} {'type': 'loss', 'content': 0.11288929730653763, 'timestamp': '2025-09-10 02:50:57.699603', 'step': 21278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:57.729386', 'step': 21278, 'epoch': 3} {'type': 'loss', 'content': 0.07713653892278671, 'timestamp': '2025-09-10 02:50:57.731970', 'step': 21279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.761943', 'step': 21279, 'epoch': 3} {'type': 'loss', 'content': 0.12656748294830322, 'timestamp': '2025-09-10 02:50:57.785600', 'step': 21280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:57.817286', 'step': 21280, 'epoch': 3} {'type': 'loss', 'content': 0.046540044248104095, 'timestamp': '2025-09-10 02:50:57.819677', 'step': 21281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.850841', 'step': 21281, 'epoch': 3} {'type': 'loss', 'content': 0.07597164809703827, 'timestamp': '2025-09-10 02:50:57.853393', 'step': 21282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:57.884120', 'step': 21282, 'epoch': 3} {'type': 'loss', 'content': 0.059137869626283646, 'timestamp': '2025-09-10 02:50:57.886538', 'step': 21283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:57.916506', 'step': 21283, 'epoch': 3} {'type': 'loss', 'content': 0.13500860333442688, 'timestamp': '2025-09-10 02:50:57.939981', 'step': 21284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:57.972014', 'step': 21284, 'epoch': 3} {'type': 'loss', 'content': 0.06733886897563934, 'timestamp': '2025-09-10 02:50:57.974249', 'step': 21285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:58.005371', 'step': 21285, 'epoch': 3} {'type': 'loss', 'content': 0.03125215694308281, 'timestamp': '2025-09-10 02:50:58.007630', 'step': 21286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:58.039507', 'step': 21286, 'epoch': 3} {'type': 'loss', 'content': 0.07569675147533417, 'timestamp': '2025-09-10 02:50:58.041777', 'step': 21287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:58.072714', 'step': 21287, 'epoch': 3} {'type': 'loss', 'content': 0.06200416013598442, 'timestamp': '2025-09-10 02:50:58.096667', 'step': 21288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:58.125724', 'step': 21288, 'epoch': 3} {'type': 'loss', 'content': 0.05692365765571594, 'timestamp': '2025-09-10 02:50:58.128399', 'step': 21289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:58.159338', 'step': 21289, 'epoch': 3} {'type': 'loss', 'content': 0.12504272162914276, 'timestamp': '2025-09-10 02:50:58.161797', 'step': 21290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:58.192633', 'step': 21290, 'epoch': 3} {'type': 'loss', 'content': 0.07512503117322922, 'timestamp': '2025-09-10 02:50:58.195413', 'step': 21291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:58.227141', 'step': 21291, 'epoch': 3} {'type': 'loss', 'content': 0.057168468832969666, 'timestamp': '2025-09-10 02:50:58.251321', 'step': 21292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:58.282298', 'step': 21292, 'epoch': 3} {'type': 'loss', 'content': 0.09942417591810226, 'timestamp': '2025-09-10 02:50:58.284789', 'step': 21293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:58.315456', 'step': 21293, 'epoch': 3} {'type': 'loss', 'content': 0.0548289529979229, 'timestamp': '2025-09-10 02:50:58.317901', 'step': 21294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:58.349323', 'step': 21294, 'epoch': 3} {'type': 'loss', 'content': 0.03158213570713997, 'timestamp': '2025-09-10 02:50:58.351970', 'step': 21295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:58.382182', 'step': 21295, 'epoch': 3} {'type': 'loss', 'content': 0.10576218366622925, 'timestamp': '2025-09-10 02:50:58.405543', 'step': 21296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:58.435507', 'step': 21296, 'epoch': 3} {'type': 'loss', 'content': 0.035578612238168716, 'timestamp': '2025-09-10 02:50:58.438080', 'step': 21297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:58.467908', 'step': 21297, 'epoch': 3} {'type': 'loss', 'content': 0.07811903953552246, 'timestamp': '2025-09-10 02:50:58.470422', 'step': 21298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:58.501725', 'step': 21298, 'epoch': 3} {'type': 'loss', 'content': 0.09781250357627869, 'timestamp': '2025-09-10 02:50:58.503868', 'step': 21299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:58.535237', 'step': 21299, 'epoch': 3} {'type': 'loss', 'content': 0.06116388365626335, 'timestamp': '2025-09-10 02:50:58.559306', 'step': 21300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:58.590126', 'step': 21300, 'epoch': 3} {'type': 'loss', 'content': 0.05933371186256409, 'timestamp': '2025-09-10 02:50:58.592538', 'step': 21301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:50:58.623703', 'step': 21301, 'epoch': 3} {'type': 'loss', 'content': 0.0637427493929863, 'timestamp': '2025-09-10 02:50:58.626328', 'step': 21302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:58.668170', 'step': 21302, 'epoch': 3} {'type': 'loss', 'content': 0.03766096755862236, 'timestamp': '2025-09-10 02:50:58.670641', 'step': 21303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:58.701283', 'step': 21303, 'epoch': 3} {'type': 'loss', 'content': 0.05922826752066612, 'timestamp': '2025-09-10 02:50:58.726858', 'step': 21304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:58.759134', 'step': 21304, 'epoch': 3} {'type': 'loss', 'content': 0.06607555598020554, 'timestamp': '2025-09-10 02:50:58.761343', 'step': 21305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:58.792526', 'step': 21305, 'epoch': 3} {'type': 'loss', 'content': 0.1180620938539505, 'timestamp': '2025-09-10 02:50:58.795158', 'step': 21306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:58.826171', 'step': 21306, 'epoch': 3} {'type': 'loss', 'content': 0.12289539724588394, 'timestamp': '2025-09-10 02:50:58.830037', 'step': 21307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:58.861271', 'step': 21307, 'epoch': 3} {'type': 'loss', 'content': 0.14907211065292358, 'timestamp': '2025-09-10 02:50:58.884504', 'step': 21308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:58.917473', 'step': 21308, 'epoch': 3} {'type': 'loss', 'content': 0.10029852390289307, 'timestamp': '2025-09-10 02:50:58.919445', 'step': 21309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:58.949737', 'step': 21309, 'epoch': 3} {'type': 'loss', 'content': 0.060627471655607224, 'timestamp': '2025-09-10 02:50:58.952431', 'step': 21310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:58.983898', 'step': 21310, 'epoch': 3} {'type': 'loss', 'content': 0.08083374053239822, 'timestamp': '2025-09-10 02:50:58.986333', 'step': 21311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:59.017036', 'step': 21311, 'epoch': 3} {'type': 'loss', 'content': 0.0695299580693245, 'timestamp': '2025-09-10 02:50:59.040814', 'step': 21312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.072125', 'step': 21312, 'epoch': 3} {'type': 'loss', 'content': 0.07186034321784973, 'timestamp': '2025-09-10 02:50:59.075008', 'step': 21313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:59.105507', 'step': 21313, 'epoch': 3} {'type': 'loss', 'content': 0.11957821995019913, 'timestamp': '2025-09-10 02:50:59.108179', 'step': 21314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:59.145536', 'step': 21314, 'epoch': 3} {'type': 'loss', 'content': 0.039437226951122284, 'timestamp': '2025-09-10 02:50:59.148098', 'step': 21315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:59.178766', 'step': 21315, 'epoch': 3} {'type': 'loss', 'content': 0.056281231343746185, 'timestamp': '2025-09-10 02:50:59.202479', 'step': 21316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:59.232450', 'step': 21316, 'epoch': 3} {'type': 'loss', 'content': 0.023847483098506927, 'timestamp': '2025-09-10 02:50:59.235868', 'step': 21317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.267796', 'step': 21317, 'epoch': 3} {'type': 'loss', 'content': 0.11489042639732361, 'timestamp': '2025-09-10 02:50:59.269994', 'step': 21318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:59.301143', 'step': 21318, 'epoch': 3} {'type': 'loss', 'content': 0.07236439734697342, 'timestamp': '2025-09-10 02:50:59.303491', 'step': 21319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.333509', 'step': 21319, 'epoch': 3} {'type': 'loss', 'content': 0.08658203482627869, 'timestamp': '2025-09-10 02:50:59.357177', 'step': 21320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.387936', 'step': 21320, 'epoch': 3} {'type': 'loss', 'content': 0.06045382842421532, 'timestamp': '2025-09-10 02:50:59.390418', 'step': 21321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.420573', 'step': 21321, 'epoch': 3} {'type': 'loss', 'content': 0.14943423867225647, 'timestamp': '2025-09-10 02:50:59.423050', 'step': 21322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.453122', 'step': 21322, 'epoch': 3} {'type': 'loss', 'content': 0.06449077278375626, 'timestamp': '2025-09-10 02:50:59.455769', 'step': 21323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:59.488008', 'step': 21323, 'epoch': 3} {'type': 'loss', 'content': 0.06750407069921494, 'timestamp': '2025-09-10 02:50:59.511366', 'step': 21324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:59.541168', 'step': 21324, 'epoch': 3} {'type': 'loss', 'content': 0.12056028097867966, 'timestamp': '2025-09-10 02:50:59.543872', 'step': 21325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.574234', 'step': 21325, 'epoch': 3} {'type': 'loss', 'content': 0.03023587167263031, 'timestamp': '2025-09-10 02:50:59.578488', 'step': 21326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:50:59.609242', 'step': 21326, 'epoch': 3} {'type': 'loss', 'content': 0.12436439841985703, 'timestamp': '2025-09-10 02:50:59.612043', 'step': 21327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:59.643683', 'step': 21327, 'epoch': 3} {'type': 'loss', 'content': 0.030816182494163513, 'timestamp': '2025-09-10 02:50:59.667862', 'step': 21328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.703643', 'step': 21328, 'epoch': 3} {'type': 'loss', 'content': 0.06424180418252945, 'timestamp': '2025-09-10 02:50:59.706074', 'step': 21329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.736134', 'step': 21329, 'epoch': 3} {'type': 'loss', 'content': 0.06738646328449249, 'timestamp': '2025-09-10 02:50:59.738627', 'step': 21330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.773660', 'step': 21330, 'epoch': 3} {'type': 'loss', 'content': 0.032147571444511414, 'timestamp': '2025-09-10 02:50:59.776256', 'step': 21331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:50:59.808370', 'step': 21331, 'epoch': 3} {'type': 'loss', 'content': 0.028802882879972458, 'timestamp': '2025-09-10 02:50:59.832340', 'step': 21332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:50:59.863344', 'step': 21332, 'epoch': 3} {'type': 'loss', 'content': 0.05637137591838837, 'timestamp': '2025-09-10 02:50:59.866125', 'step': 21333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:50:59.897512', 'step': 21333, 'epoch': 3} {'type': 'loss', 'content': 0.15134942531585693, 'timestamp': '2025-09-10 02:50:59.899915', 'step': 21334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:50:59.929723', 'step': 21334, 'epoch': 3} {'type': 'loss', 'content': 0.05690056085586548, 'timestamp': '2025-09-10 02:50:59.932322', 'step': 21335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:50:59.962367', 'step': 21335, 'epoch': 3} {'type': 'loss', 'content': 0.056626636534929276, 'timestamp': '2025-09-10 02:50:59.985685', 'step': 21336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:00.016634', 'step': 21336, 'epoch': 3} {'type': 'loss', 'content': 0.03529850021004677, 'timestamp': '2025-09-10 02:51:00.018533', 'step': 21337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.048407', 'step': 21337, 'epoch': 3} {'type': 'loss', 'content': 0.041824888437986374, 'timestamp': '2025-09-10 02:51:00.050859', 'step': 21338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.081768', 'step': 21338, 'epoch': 3} {'type': 'loss', 'content': 0.11475756019353867, 'timestamp': '2025-09-10 02:51:00.088188', 'step': 21339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.127608', 'step': 21339, 'epoch': 3} {'type': 'loss', 'content': 0.06532857567071915, 'timestamp': '2025-09-10 02:51:00.154342', 'step': 21340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.194150', 'step': 21340, 'epoch': 3} {'type': 'loss', 'content': 0.03859128803014755, 'timestamp': '2025-09-10 02:51:00.198115', 'step': 21341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.231194', 'step': 21341, 'epoch': 3} {'type': 'loss', 'content': 0.11155258119106293, 'timestamp': '2025-09-10 02:51:00.237263', 'step': 21342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:00.278324', 'step': 21342, 'epoch': 3} {'type': 'loss', 'content': 0.07360558956861496, 'timestamp': '2025-09-10 02:51:00.283304', 'step': 21343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:00.322210', 'step': 21343, 'epoch': 3} {'type': 'loss', 'content': 0.06745453923940659, 'timestamp': '2025-09-10 02:51:00.345930', 'step': 21344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:00.376016', 'step': 21344, 'epoch': 3} {'type': 'loss', 'content': 0.02970792166888714, 'timestamp': '2025-09-10 02:51:00.378490', 'step': 21345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:00.409181', 'step': 21345, 'epoch': 3} {'type': 'loss', 'content': 0.05320041999220848, 'timestamp': '2025-09-10 02:51:00.411610', 'step': 21346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:00.443737', 'step': 21346, 'epoch': 3} {'type': 'loss', 'content': 0.054223280400037766, 'timestamp': '2025-09-10 02:51:00.446553', 'step': 21347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:00.478947', 'step': 21347, 'epoch': 3} {'type': 'loss', 'content': 0.03325105831027031, 'timestamp': '2025-09-10 02:51:00.502619', 'step': 21348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.532892', 'step': 21348, 'epoch': 3} {'type': 'loss', 'content': 0.028383582830429077, 'timestamp': '2025-09-10 02:51:00.537081', 'step': 21349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:00.567407', 'step': 21349, 'epoch': 3} {'type': 'loss', 'content': 0.09579916298389435, 'timestamp': '2025-09-10 02:51:00.570284', 'step': 21350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.600766', 'step': 21350, 'epoch': 3} {'type': 'loss', 'content': 0.0507192388176918, 'timestamp': '2025-09-10 02:51:00.603235', 'step': 21351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:00.633880', 'step': 21351, 'epoch': 3} {'type': 'loss', 'content': 0.037124987691640854, 'timestamp': '2025-09-10 02:51:00.663437', 'step': 21352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:00.695978', 'step': 21352, 'epoch': 3} {'type': 'loss', 'content': 0.05633364990353584, 'timestamp': '2025-09-10 02:51:00.698670', 'step': 21353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:00.729400', 'step': 21353, 'epoch': 3} {'type': 'loss', 'content': 0.024337468668818474, 'timestamp': '2025-09-10 02:51:00.731881', 'step': 21354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:00.763166', 'step': 21354, 'epoch': 3} {'type': 'loss', 'content': 0.09512145817279816, 'timestamp': '2025-09-10 02:51:00.767154', 'step': 21355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.800298', 'step': 21355, 'epoch': 3} {'type': 'loss', 'content': 0.09422946721315384, 'timestamp': '2025-09-10 02:51:00.824074', 'step': 21356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:00.853826', 'step': 21356, 'epoch': 3} {'type': 'loss', 'content': 0.049147605895996094, 'timestamp': '2025-09-10 02:51:00.856086', 'step': 21357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.886917', 'step': 21357, 'epoch': 3} {'type': 'loss', 'content': 0.03261949494481087, 'timestamp': '2025-09-10 02:51:00.889363', 'step': 21358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.920665', 'step': 21358, 'epoch': 3} {'type': 'loss', 'content': 0.09030350297689438, 'timestamp': '2025-09-10 02:51:00.922916', 'step': 21359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:00.951933', 'step': 21359, 'epoch': 3} {'type': 'loss', 'content': 0.019734758883714676, 'timestamp': '2025-09-10 02:51:00.975936', 'step': 21360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:01.007718', 'step': 21360, 'epoch': 3} {'type': 'loss', 'content': 0.05784593150019646, 'timestamp': '2025-09-10 02:51:01.010098', 'step': 21361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:01.040612', 'step': 21361, 'epoch': 3} {'type': 'loss', 'content': 0.10344165563583374, 'timestamp': '2025-09-10 02:51:01.043208', 'step': 21362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:01.073455', 'step': 21362, 'epoch': 3} {'type': 'loss', 'content': 0.024554548785090446, 'timestamp': '2025-09-10 02:51:01.076459', 'step': 21363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:01.108236', 'step': 21363, 'epoch': 3} {'type': 'loss', 'content': 0.07847138494253159, 'timestamp': '2025-09-10 02:51:01.131983', 'step': 21364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:01.164840', 'step': 21364, 'epoch': 3} {'type': 'loss', 'content': 0.09975714981555939, 'timestamp': '2025-09-10 02:51:01.167261', 'step': 21365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:01.197609', 'step': 21365, 'epoch': 3} {'type': 'loss', 'content': 0.09095953404903412, 'timestamp': '2025-09-10 02:51:01.200087', 'step': 21366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:01.231201', 'step': 21366, 'epoch': 3} {'type': 'loss', 'content': 0.12904641032218933, 'timestamp': '2025-09-10 02:51:01.233668', 'step': 21367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:01.268185', 'step': 21367, 'epoch': 3} {'type': 'loss', 'content': 0.07477905601263046, 'timestamp': '2025-09-10 02:51:01.293561', 'step': 21368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:01.332187', 'step': 21368, 'epoch': 3} {'type': 'loss', 'content': 0.03876103088259697, 'timestamp': '2025-09-10 02:51:01.338912', 'step': 21369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:01.376513', 'step': 21369, 'epoch': 3} {'type': 'loss', 'content': 0.043234683573246, 'timestamp': '2025-09-10 02:51:01.380151', 'step': 21370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:01.414633', 'step': 21370, 'epoch': 3} {'type': 'loss', 'content': 0.02933511883020401, 'timestamp': '2025-09-10 02:51:01.419836', 'step': 21371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:01.456655', 'step': 21371, 'epoch': 3} {'type': 'loss', 'content': 0.03224354609847069, 'timestamp': '2025-09-10 02:51:01.482768', 'step': 21372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:01.517608', 'step': 21372, 'epoch': 3} {'type': 'loss', 'content': 0.08164989948272705, 'timestamp': '2025-09-10 02:51:01.522372', 'step': 21373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:01.557888', 'step': 21373, 'epoch': 3} {'type': 'loss', 'content': 0.08755374699831009, 'timestamp': '2025-09-10 02:51:01.560607', 'step': 21374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:01.592025', 'step': 21374, 'epoch': 3} {'type': 'loss', 'content': 0.06976457685232162, 'timestamp': '2025-09-10 02:51:01.595159', 'step': 21375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:01.625582', 'step': 21375, 'epoch': 3} {'type': 'loss', 'content': 0.07242444157600403, 'timestamp': '2025-09-10 02:51:01.649179', 'step': 21376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:01.680504', 'step': 21376, 'epoch': 3} {'type': 'loss', 'content': 0.09265267848968506, 'timestamp': '2025-09-10 02:51:01.683252', 'step': 21377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:01.714743', 'step': 21377, 'epoch': 3} {'type': 'loss', 'content': 0.0565849132835865, 'timestamp': '2025-09-10 02:51:01.717163', 'step': 21378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:01.747598', 'step': 21378, 'epoch': 3} {'type': 'loss', 'content': 0.12354057282209396, 'timestamp': '2025-09-10 02:51:01.751487', 'step': 21379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:01.784215', 'step': 21379, 'epoch': 3} {'type': 'loss', 'content': 0.05457542836666107, 'timestamp': '2025-09-10 02:51:01.808409', 'step': 21380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:01.838995', 'step': 21380, 'epoch': 3} {'type': 'loss', 'content': 0.049291014671325684, 'timestamp': '2025-09-10 02:51:01.841557', 'step': 21381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:01.872783', 'step': 21381, 'epoch': 3} {'type': 'loss', 'content': 0.026545735076069832, 'timestamp': '2025-09-10 02:51:01.875257', 'step': 21382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:01.906428', 'step': 21382, 'epoch': 3} {'type': 'loss', 'content': 0.10438894480466843, 'timestamp': '2025-09-10 02:51:01.910512', 'step': 21383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:01.940951', 'step': 21383, 'epoch': 3} {'type': 'loss', 'content': 0.10063493251800537, 'timestamp': '2025-09-10 02:51:01.964820', 'step': 21384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:01.996960', 'step': 21384, 'epoch': 3} {'type': 'loss', 'content': 0.100472092628479, 'timestamp': '2025-09-10 02:51:01.999839', 'step': 21385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.030601', 'step': 21385, 'epoch': 3} {'type': 'loss', 'content': 0.06776866316795349, 'timestamp': '2025-09-10 02:51:02.033094', 'step': 21386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:02.063405', 'step': 21386, 'epoch': 3} {'type': 'loss', 'content': 0.04520628601312637, 'timestamp': '2025-09-10 02:51:02.066439', 'step': 21387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.097133', 'step': 21387, 'epoch': 3} {'type': 'loss', 'content': 0.12686863541603088, 'timestamp': '2025-09-10 02:51:02.121214', 'step': 21388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.152842', 'step': 21388, 'epoch': 3} {'type': 'loss', 'content': 0.060192227363586426, 'timestamp': '2025-09-10 02:51:02.158667', 'step': 21389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.193392', 'step': 21389, 'epoch': 3} {'type': 'loss', 'content': 0.02565649524331093, 'timestamp': '2025-09-10 02:51:02.198314', 'step': 21390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:02.239876', 'step': 21390, 'epoch': 3} {'type': 'loss', 'content': 0.0652146264910698, 'timestamp': '2025-09-10 02:51:02.245599', 'step': 21391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.286988', 'step': 21391, 'epoch': 3} {'type': 'loss', 'content': 0.04831286892294884, 'timestamp': '2025-09-10 02:51:02.311097', 'step': 21392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.344865', 'step': 21392, 'epoch': 3} {'type': 'loss', 'content': 0.08630049228668213, 'timestamp': '2025-09-10 02:51:02.347349', 'step': 21393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:02.377585', 'step': 21393, 'epoch': 3} {'type': 'loss', 'content': 0.061571113765239716, 'timestamp': '2025-09-10 02:51:02.380094', 'step': 21394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:02.410019', 'step': 21394, 'epoch': 3} {'type': 'loss', 'content': 0.07218895852565765, 'timestamp': '2025-09-10 02:51:02.412703', 'step': 21395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:02.445265', 'step': 21395, 'epoch': 3} {'type': 'loss', 'content': 0.023283017799258232, 'timestamp': '2025-09-10 02:51:02.469103', 'step': 21396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.499306', 'step': 21396, 'epoch': 3} {'type': 'loss', 'content': 0.0794239342212677, 'timestamp': '2025-09-10 02:51:02.501724', 'step': 21397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.532594', 'step': 21397, 'epoch': 3} {'type': 'loss', 'content': 0.043681930750608444, 'timestamp': '2025-09-10 02:51:02.535289', 'step': 21398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.566176', 'step': 21398, 'epoch': 3} {'type': 'loss', 'content': 0.0612761564552784, 'timestamp': '2025-09-10 02:51:02.568901', 'step': 21399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.599137', 'step': 21399, 'epoch': 3} {'type': 'loss', 'content': 0.1093774363398552, 'timestamp': '2025-09-10 02:51:02.622825', 'step': 21400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.653568', 'step': 21400, 'epoch': 3} {'type': 'loss', 'content': 0.08354175090789795, 'timestamp': '2025-09-10 02:51:02.656086', 'step': 21401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:02.685851', 'step': 21401, 'epoch': 3} {'type': 'loss', 'content': 0.05473707243800163, 'timestamp': '2025-09-10 02:51:02.688428', 'step': 21402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:02.719191', 'step': 21402, 'epoch': 3} {'type': 'loss', 'content': 0.08096938580274582, 'timestamp': '2025-09-10 02:51:02.721162', 'step': 21403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.751929', 'step': 21403, 'epoch': 3} {'type': 'loss', 'content': 0.07874861359596252, 'timestamp': '2025-09-10 02:51:02.775368', 'step': 21404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:02.805815', 'step': 21404, 'epoch': 3} {'type': 'loss', 'content': 0.0761835128068924, 'timestamp': '2025-09-10 02:51:02.808237', 'step': 21405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:02.838604', 'step': 21405, 'epoch': 3} {'type': 'loss', 'content': 0.07781845331192017, 'timestamp': '2025-09-10 02:51:02.843271', 'step': 21406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:02.876517', 'step': 21406, 'epoch': 3} {'type': 'loss', 'content': 0.08499743044376373, 'timestamp': '2025-09-10 02:51:02.879037', 'step': 21407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:02.909353', 'step': 21407, 'epoch': 3} {'type': 'loss', 'content': 0.053579844534397125, 'timestamp': '2025-09-10 02:51:02.933318', 'step': 21408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:02.964500', 'step': 21408, 'epoch': 3} {'type': 'loss', 'content': 0.10700040310621262, 'timestamp': '2025-09-10 02:51:02.967108', 'step': 21409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:02.998658', 'step': 21409, 'epoch': 3} {'type': 'loss', 'content': 0.0667879581451416, 'timestamp': '2025-09-10 02:51:03.000912', 'step': 21410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.031823', 'step': 21410, 'epoch': 3} {'type': 'loss', 'content': 0.07243447750806808, 'timestamp': '2025-09-10 02:51:03.034251', 'step': 21411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:03.064629', 'step': 21411, 'epoch': 3} {'type': 'loss', 'content': 0.08429578691720963, 'timestamp': '2025-09-10 02:51:03.088485', 'step': 21412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:03.119428', 'step': 21412, 'epoch': 3} {'type': 'loss', 'content': 0.1303514540195465, 'timestamp': '2025-09-10 02:51:03.121802', 'step': 21413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.157306', 'step': 21413, 'epoch': 3} {'type': 'loss', 'content': 0.04031726345419884, 'timestamp': '2025-09-10 02:51:03.159960', 'step': 21414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:03.190468', 'step': 21414, 'epoch': 3} {'type': 'loss', 'content': 0.08356502652168274, 'timestamp': '2025-09-10 02:51:03.194518', 'step': 21415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:03.228522', 'step': 21415, 'epoch': 3} {'type': 'loss', 'content': 0.07038770616054535, 'timestamp': '2025-09-10 02:51:03.252378', 'step': 21416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.283575', 'step': 21416, 'epoch': 3} {'type': 'loss', 'content': 0.027972953394055367, 'timestamp': '2025-09-10 02:51:03.286329', 'step': 21417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:03.316707', 'step': 21417, 'epoch': 3} {'type': 'loss', 'content': 0.08019670099020004, 'timestamp': '2025-09-10 02:51:03.319246', 'step': 21418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.351313', 'step': 21418, 'epoch': 3} {'type': 'loss', 'content': 0.07616350054740906, 'timestamp': '2025-09-10 02:51:03.353498', 'step': 21419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:03.383941', 'step': 21419, 'epoch': 3} {'type': 'loss', 'content': 0.0862906202673912, 'timestamp': '2025-09-10 02:51:03.407823', 'step': 21420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:03.438433', 'step': 21420, 'epoch': 3} {'type': 'loss', 'content': 0.061927665024995804, 'timestamp': '2025-09-10 02:51:03.440961', 'step': 21421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:03.471676', 'step': 21421, 'epoch': 3} {'type': 'loss', 'content': 0.07760512828826904, 'timestamp': '2025-09-10 02:51:03.474692', 'step': 21422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.505822', 'step': 21422, 'epoch': 3} {'type': 'loss', 'content': 0.09843391180038452, 'timestamp': '2025-09-10 02:51:03.508989', 'step': 21423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:03.540160', 'step': 21423, 'epoch': 3} {'type': 'loss', 'content': 0.07865074276924133, 'timestamp': '2025-09-10 02:51:03.564845', 'step': 21424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.595178', 'step': 21424, 'epoch': 3} {'type': 'loss', 'content': 0.03362813964486122, 'timestamp': '2025-09-10 02:51:03.597775', 'step': 21425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.629317', 'step': 21425, 'epoch': 3} {'type': 'loss', 'content': 0.0830456018447876, 'timestamp': '2025-09-10 02:51:03.633506', 'step': 21426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:03.665345', 'step': 21426, 'epoch': 3} {'type': 'loss', 'content': 0.08931685239076614, 'timestamp': '2025-09-10 02:51:03.671231', 'step': 21427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:03.708783', 'step': 21427, 'epoch': 3} {'type': 'loss', 'content': 0.07391534745693207, 'timestamp': '2025-09-10 02:51:03.733740', 'step': 21428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:03.767899', 'step': 21428, 'epoch': 3} {'type': 'loss', 'content': 0.0640869215130806, 'timestamp': '2025-09-10 02:51:03.770331', 'step': 21429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.802786', 'step': 21429, 'epoch': 3} {'type': 'loss', 'content': 0.0740167424082756, 'timestamp': '2025-09-10 02:51:03.805404', 'step': 21430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.835930', 'step': 21430, 'epoch': 3} {'type': 'loss', 'content': 0.05687520653009415, 'timestamp': '2025-09-10 02:51:03.838976', 'step': 21431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.872047', 'step': 21431, 'epoch': 3} {'type': 'loss', 'content': 0.09762407094240189, 'timestamp': '2025-09-10 02:51:03.895736', 'step': 21432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:03.926794', 'step': 21432, 'epoch': 3} {'type': 'loss', 'content': 0.09257480502128601, 'timestamp': '2025-09-10 02:51:03.928669', 'step': 21433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:03.961822', 'step': 21433, 'epoch': 3} {'type': 'loss', 'content': 0.07382841408252716, 'timestamp': '2025-09-10 02:51:03.964405', 'step': 21434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:04.002562', 'step': 21434, 'epoch': 3} {'type': 'loss', 'content': 0.061811815947294235, 'timestamp': '2025-09-10 02:51:04.008705', 'step': 21435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:04.050382', 'step': 21435, 'epoch': 3} {'type': 'loss', 'content': 0.0915006548166275, 'timestamp': '2025-09-10 02:51:04.073994', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:51:12.428708', 'step': 21436, 'epoch': 3} {'type': 'pplx', 'content': 8669.759782891722, 'timestamp': '2025-09-10 02:51:12.432272', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:12.462650', 'step': 21436, 'epoch': 3} {'type': 'loss', 'content': 0.032434552907943726, 'timestamp': '2025-09-10 02:51:12.466880', 'step': 21437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:12.497684', 'step': 21437, 'epoch': 3} {'type': 'loss', 'content': 0.07569286227226257, 'timestamp': '2025-09-10 02:51:12.500250', 'step': 21438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:12.531462', 'step': 21438, 'epoch': 3} {'type': 'loss', 'content': 0.14535649120807648, 'timestamp': '2025-09-10 02:51:12.534440', 'step': 21439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:12.565476', 'step': 21439, 'epoch': 3} {'type': 'loss', 'content': 0.036472469568252563, 'timestamp': '2025-09-10 02:51:12.589611', 'step': 21440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:12.620298', 'step': 21440, 'epoch': 3} {'type': 'loss', 'content': 0.02754337154328823, 'timestamp': '2025-09-10 02:51:12.623589', 'step': 21441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:12.655993', 'step': 21441, 'epoch': 3} {'type': 'loss', 'content': 0.05750912427902222, 'timestamp': '2025-09-10 02:51:12.659234', 'step': 21442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:12.691653', 'step': 21442, 'epoch': 3} {'type': 'loss', 'content': 0.0450347401201725, 'timestamp': '2025-09-10 02:51:12.694241', 'step': 21443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:12.727154', 'step': 21443, 'epoch': 3} {'type': 'loss', 'content': 0.05324319005012512, 'timestamp': '2025-09-10 02:51:12.751207', 'step': 21444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:12.783233', 'step': 21444, 'epoch': 3} {'type': 'loss', 'content': 0.08496424555778503, 'timestamp': '2025-09-10 02:51:12.785648', 'step': 21445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:12.816555', 'step': 21445, 'epoch': 3} {'type': 'loss', 'content': 0.03125873580574989, 'timestamp': '2025-09-10 02:51:12.819311', 'step': 21446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:12.850390', 'step': 21446, 'epoch': 3} {'type': 'loss', 'content': 0.1647171527147293, 'timestamp': '2025-09-10 02:51:12.854554', 'step': 21447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:12.886423', 'step': 21447, 'epoch': 3} {'type': 'loss', 'content': 0.04831966012716293, 'timestamp': '2025-09-10 02:51:12.910711', 'step': 21448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:12.941306', 'step': 21448, 'epoch': 3} {'type': 'loss', 'content': 0.04058980196714401, 'timestamp': '2025-09-10 02:51:12.943844', 'step': 21449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:12.979027', 'step': 21449, 'epoch': 3} {'type': 'loss', 'content': 0.09930326044559479, 'timestamp': '2025-09-10 02:51:12.982133', 'step': 21450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:13.013131', 'step': 21450, 'epoch': 3} {'type': 'loss', 'content': 0.09798451513051987, 'timestamp': '2025-09-10 02:51:13.015865', 'step': 21451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:13.046778', 'step': 21451, 'epoch': 3} {'type': 'loss', 'content': 0.08938244730234146, 'timestamp': '2025-09-10 02:51:13.070901', 'step': 21452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:13.104960', 'step': 21452, 'epoch': 3} {'type': 'loss', 'content': 0.04927755147218704, 'timestamp': '2025-09-10 02:51:13.107921', 'step': 21453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:13.139557', 'step': 21453, 'epoch': 3} {'type': 'loss', 'content': 0.11497550457715988, 'timestamp': '2025-09-10 02:51:13.143085', 'step': 21454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:13.178793', 'step': 21454, 'epoch': 3} {'type': 'loss', 'content': 0.007642976474016905, 'timestamp': '2025-09-10 02:51:13.181266', 'step': 21455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:13.213418', 'step': 21455, 'epoch': 3} {'type': 'loss', 'content': 0.012623919174075127, 'timestamp': '2025-09-10 02:51:13.237016', 'step': 21456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:13.266540', 'step': 21456, 'epoch': 3} {'type': 'loss', 'content': 0.004314114339649677, 'timestamp': '2025-09-10 02:51:13.269319', 'step': 21457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:13.300125', 'step': 21457, 'epoch': 3} {'type': 'loss', 'content': 0.0591898076236248, 'timestamp': '2025-09-10 02:51:13.302443', 'step': 21458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:13.333663', 'step': 21458, 'epoch': 3} {'type': 'loss', 'content': 0.05929800868034363, 'timestamp': '2025-09-10 02:51:13.336244', 'step': 21459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:13.366631', 'step': 21459, 'epoch': 3} {'type': 'loss', 'content': 0.0689241960644722, 'timestamp': '2025-09-10 02:51:13.390680', 'step': 21460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:13.421867', 'step': 21460, 'epoch': 3} {'type': 'loss', 'content': 0.024359026923775673, 'timestamp': '2025-09-10 02:51:13.424196', 'step': 21461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:13.455411', 'step': 21461, 'epoch': 3} {'type': 'loss', 'content': 0.05373620614409447, 'timestamp': '2025-09-10 02:51:13.458203', 'step': 21462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:13.490207', 'step': 21462, 'epoch': 3} {'type': 'loss', 'content': 0.06893480569124222, 'timestamp': '2025-09-10 02:51:13.492965', 'step': 21463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:13.524442', 'step': 21463, 'epoch': 3} {'type': 'loss', 'content': 0.036742549389600754, 'timestamp': '2025-09-10 02:51:13.548259', 'step': 21464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:13.580133', 'step': 21464, 'epoch': 3} {'type': 'loss', 'content': 0.01900697499513626, 'timestamp': '2025-09-10 02:51:13.582730', 'step': 21465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:13.614359', 'step': 21465, 'epoch': 3} {'type': 'loss', 'content': 0.030806880444288254, 'timestamp': '2025-09-10 02:51:13.617082', 'step': 21466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:13.648118', 'step': 21466, 'epoch': 3} {'type': 'loss', 'content': 0.02950936369597912, 'timestamp': '2025-09-10 02:51:13.650586', 'step': 21467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:13.681618', 'step': 21467, 'epoch': 3} {'type': 'loss', 'content': 0.05318853259086609, 'timestamp': '2025-09-10 02:51:13.705257', 'step': 21468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:13.742640', 'step': 21468, 'epoch': 3} {'type': 'loss', 'content': 0.04518526792526245, 'timestamp': '2025-09-10 02:51:13.747202', 'step': 21469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:13.780116', 'step': 21469, 'epoch': 3} {'type': 'loss', 'content': 0.09075218439102173, 'timestamp': '2025-09-10 02:51:13.782824', 'step': 21470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:13.812967', 'step': 21470, 'epoch': 3} {'type': 'loss', 'content': 0.1135907769203186, 'timestamp': '2025-09-10 02:51:13.815497', 'step': 21471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:13.846302', 'step': 21471, 'epoch': 3} {'type': 'loss', 'content': 0.07779289036989212, 'timestamp': '2025-09-10 02:51:13.869924', 'step': 21472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:13.901111', 'step': 21472, 'epoch': 3} {'type': 'loss', 'content': 0.07669619470834732, 'timestamp': '2025-09-10 02:51:13.903438', 'step': 21473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:13.934110', 'step': 21473, 'epoch': 3} {'type': 'loss', 'content': 0.01699378900229931, 'timestamp': '2025-09-10 02:51:13.936545', 'step': 21474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:13.967533', 'step': 21474, 'epoch': 3} {'type': 'loss', 'content': 0.12567006051540375, 'timestamp': '2025-09-10 02:51:13.969849', 'step': 21475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:14.000952', 'step': 21475, 'epoch': 3} {'type': 'loss', 'content': 0.1692477911710739, 'timestamp': '2025-09-10 02:51:14.024504', 'step': 21476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:14.055395', 'step': 21476, 'epoch': 3} {'type': 'loss', 'content': 0.0766473039984703, 'timestamp': '2025-09-10 02:51:14.057843', 'step': 21477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:14.090314', 'step': 21477, 'epoch': 3} {'type': 'loss', 'content': 0.047109175473451614, 'timestamp': '2025-09-10 02:51:14.094401', 'step': 21478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:14.125431', 'step': 21478, 'epoch': 3} {'type': 'loss', 'content': 0.05937454104423523, 'timestamp': '2025-09-10 02:51:14.130373', 'step': 21479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.164432', 'step': 21479, 'epoch': 3} {'type': 'loss', 'content': 0.04604542627930641, 'timestamp': '2025-09-10 02:51:14.188704', 'step': 21480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.218799', 'step': 21480, 'epoch': 3} {'type': 'loss', 'content': 0.05023342743515968, 'timestamp': '2025-09-10 02:51:14.221234', 'step': 21481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.251579', 'step': 21481, 'epoch': 3} {'type': 'loss', 'content': 0.05368306487798691, 'timestamp': '2025-09-10 02:51:14.253927', 'step': 21482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.284609', 'step': 21482, 'epoch': 3} {'type': 'loss', 'content': 0.08236127346754074, 'timestamp': '2025-09-10 02:51:14.286903', 'step': 21483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:14.317373', 'step': 21483, 'epoch': 3} {'type': 'loss', 'content': 0.0766364261507988, 'timestamp': '2025-09-10 02:51:14.340792', 'step': 21484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:14.370994', 'step': 21484, 'epoch': 3} {'type': 'loss', 'content': 0.041114430874586105, 'timestamp': '2025-09-10 02:51:14.374339', 'step': 21485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.404754', 'step': 21485, 'epoch': 3} {'type': 'loss', 'content': 0.08517667651176453, 'timestamp': '2025-09-10 02:51:14.406929', 'step': 21486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.437476', 'step': 21486, 'epoch': 3} {'type': 'loss', 'content': 0.04867100715637207, 'timestamp': '2025-09-10 02:51:14.439686', 'step': 21487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.470357', 'step': 21487, 'epoch': 3} {'type': 'loss', 'content': 0.09476860612630844, 'timestamp': '2025-09-10 02:51:14.497521', 'step': 21488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.527928', 'step': 21488, 'epoch': 3} {'type': 'loss', 'content': 0.09270505607128143, 'timestamp': '2025-09-10 02:51:14.533219', 'step': 21489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:14.564691', 'step': 21489, 'epoch': 3} {'type': 'loss', 'content': 0.05990825966000557, 'timestamp': '2025-09-10 02:51:14.567676', 'step': 21490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:14.602634', 'step': 21490, 'epoch': 3} {'type': 'loss', 'content': 0.050229284912347794, 'timestamp': '2025-09-10 02:51:14.605192', 'step': 21491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:14.635561', 'step': 21491, 'epoch': 3} {'type': 'loss', 'content': 0.06149635836482048, 'timestamp': '2025-09-10 02:51:14.658959', 'step': 21492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.689968', 'step': 21492, 'epoch': 3} {'type': 'loss', 'content': 0.018810341134667397, 'timestamp': '2025-09-10 02:51:14.692283', 'step': 21493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:14.723276', 'step': 21493, 'epoch': 3} {'type': 'loss', 'content': 0.06032775714993477, 'timestamp': '2025-09-10 02:51:14.728950', 'step': 21494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.763464', 'step': 21494, 'epoch': 3} {'type': 'loss', 'content': 0.1160859689116478, 'timestamp': '2025-09-10 02:51:14.765779', 'step': 21495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:14.796730', 'step': 21495, 'epoch': 3} {'type': 'loss', 'content': 0.05328034609556198, 'timestamp': '2025-09-10 02:51:14.823546', 'step': 21496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:14.858107', 'step': 21496, 'epoch': 3} {'type': 'loss', 'content': 0.06358620524406433, 'timestamp': '2025-09-10 02:51:14.860687', 'step': 21497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:14.893449', 'step': 21497, 'epoch': 3} {'type': 'loss', 'content': 0.03416056931018829, 'timestamp': '2025-09-10 02:51:14.896340', 'step': 21498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:14.927461', 'step': 21498, 'epoch': 3} {'type': 'loss', 'content': 0.04897122085094452, 'timestamp': '2025-09-10 02:51:14.929801', 'step': 21499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:14.960413', 'step': 21499, 'epoch': 3} {'type': 'loss', 'content': 0.09625419229269028, 'timestamp': '2025-09-10 02:51:14.985029', 'step': 21500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21500', 'timestamp': '2025-09-10 02:51:19.877547', 'step': 21500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:19.909889', 'step': 21500, 'epoch': 3} {'type': 'loss', 'content': 0.08382822573184967, 'timestamp': '2025-09-10 02:51:19.912294', 'step': 21501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:19.944672', 'step': 21501, 'epoch': 3} {'type': 'loss', 'content': 0.04002945125102997, 'timestamp': '2025-09-10 02:51:19.946749', 'step': 21502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:19.977933', 'step': 21502, 'epoch': 3} {'type': 'loss', 'content': 0.024420147761702538, 'timestamp': '2025-09-10 02:51:19.980198', 'step': 21503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:20.011216', 'step': 21503, 'epoch': 3} {'type': 'loss', 'content': 0.053074609488248825, 'timestamp': '2025-09-10 02:51:20.035118', 'step': 21504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:20.067647', 'step': 21504, 'epoch': 3} {'type': 'loss', 'content': 0.12298709154129028, 'timestamp': '2025-09-10 02:51:20.069521', 'step': 21505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:20.100138', 'step': 21505, 'epoch': 3} {'type': 'loss', 'content': 0.022932741791009903, 'timestamp': '2025-09-10 02:51:20.102731', 'step': 21506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:20.133953', 'step': 21506, 'epoch': 3} {'type': 'loss', 'content': 0.07010143995285034, 'timestamp': '2025-09-10 02:51:20.136710', 'step': 21507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:20.169021', 'step': 21507, 'epoch': 3} {'type': 'loss', 'content': 0.06603194773197174, 'timestamp': '2025-09-10 02:51:20.192977', 'step': 21508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:20.224319', 'step': 21508, 'epoch': 3} {'type': 'loss', 'content': 0.06537895649671555, 'timestamp': '2025-09-10 02:51:20.226904', 'step': 21509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:20.257721', 'step': 21509, 'epoch': 3} {'type': 'loss', 'content': 0.08930942416191101, 'timestamp': '2025-09-10 02:51:20.260352', 'step': 21510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:20.291248', 'step': 21510, 'epoch': 3} {'type': 'loss', 'content': 0.06240239366889, 'timestamp': '2025-09-10 02:51:20.293640', 'step': 21511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:20.325563', 'step': 21511, 'epoch': 3} {'type': 'loss', 'content': 0.10858628898859024, 'timestamp': '2025-09-10 02:51:20.349090', 'step': 21512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:20.382332', 'step': 21512, 'epoch': 3} {'type': 'loss', 'content': 0.040123552083969116, 'timestamp': '2025-09-10 02:51:20.384184', 'step': 21513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:20.414326', 'step': 21513, 'epoch': 3} {'type': 'loss', 'content': 0.06374838203191757, 'timestamp': '2025-09-10 02:51:20.417008', 'step': 21514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:20.447702', 'step': 21514, 'epoch': 3} {'type': 'loss', 'content': 0.04110579937696457, 'timestamp': '2025-09-10 02:51:20.451636', 'step': 21515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:20.482958', 'step': 21515, 'epoch': 3} {'type': 'loss', 'content': 0.06393854320049286, 'timestamp': '2025-09-10 02:51:20.506282', 'step': 21516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:20.537011', 'step': 21516, 'epoch': 3} {'type': 'loss', 'content': 0.04855887591838837, 'timestamp': '2025-09-10 02:51:20.539362', 'step': 21517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:20.569787', 'step': 21517, 'epoch': 3} {'type': 'loss', 'content': 0.015029986388981342, 'timestamp': '2025-09-10 02:51:20.572298', 'step': 21518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:20.603442', 'step': 21518, 'epoch': 3} {'type': 'loss', 'content': 0.022892143577337265, 'timestamp': '2025-09-10 02:51:20.606311', 'step': 21519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:20.636971', 'step': 21519, 'epoch': 3} {'type': 'loss', 'content': 0.04162809997797012, 'timestamp': '2025-09-10 02:51:20.660413', 'step': 21520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:20.691591', 'step': 21520, 'epoch': 3} {'type': 'loss', 'content': 0.031929317861795425, 'timestamp': '2025-09-10 02:51:20.693452', 'step': 21521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:20.723437', 'step': 21521, 'epoch': 3} {'type': 'loss', 'content': 0.04689488559961319, 'timestamp': '2025-09-10 02:51:20.725852', 'step': 21522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:20.757612', 'step': 21522, 'epoch': 3} {'type': 'loss', 'content': 0.10245160758495331, 'timestamp': '2025-09-10 02:51:20.760445', 'step': 21523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:20.793419', 'step': 21523, 'epoch': 3} {'type': 'loss', 'content': 0.10270147025585175, 'timestamp': '2025-09-10 02:51:20.817040', 'step': 21524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:20.849251', 'step': 21524, 'epoch': 3} {'type': 'loss', 'content': 0.017513152211904526, 'timestamp': '2025-09-10 02:51:20.851617', 'step': 21525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:20.883478', 'step': 21525, 'epoch': 3} {'type': 'loss', 'content': 0.027548450976610184, 'timestamp': '2025-09-10 02:51:20.886196', 'step': 21526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:20.916907', 'step': 21526, 'epoch': 3} {'type': 'loss', 'content': 0.024224424734711647, 'timestamp': '2025-09-10 02:51:20.919701', 'step': 21527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:20.950050', 'step': 21527, 'epoch': 3} {'type': 'loss', 'content': 0.059944216161966324, 'timestamp': '2025-09-10 02:51:20.973556', 'step': 21528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:21.003946', 'step': 21528, 'epoch': 3} {'type': 'loss', 'content': 0.013426178134977818, 'timestamp': '2025-09-10 02:51:21.007101', 'step': 21529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:21.038792', 'step': 21529, 'epoch': 3} {'type': 'loss', 'content': 0.08851220458745956, 'timestamp': '2025-09-10 02:51:21.041172', 'step': 21530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:21.071281', 'step': 21530, 'epoch': 3} {'type': 'loss', 'content': 0.05710062012076378, 'timestamp': '2025-09-10 02:51:21.073315', 'step': 21531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.103746', 'step': 21531, 'epoch': 3} {'type': 'loss', 'content': 0.06627603620290756, 'timestamp': '2025-09-10 02:51:21.127576', 'step': 21532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.160675', 'step': 21532, 'epoch': 3} {'type': 'loss', 'content': 0.04254990443587303, 'timestamp': '2025-09-10 02:51:21.162757', 'step': 21533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.194998', 'step': 21533, 'epoch': 3} {'type': 'loss', 'content': 0.059106599539518356, 'timestamp': '2025-09-10 02:51:21.197202', 'step': 21534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.227405', 'step': 21534, 'epoch': 3} {'type': 'loss', 'content': 0.020713750272989273, 'timestamp': '2025-09-10 02:51:21.230023', 'step': 21535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.261459', 'step': 21535, 'epoch': 3} {'type': 'loss', 'content': 0.08429858833551407, 'timestamp': '2025-09-10 02:51:21.285138', 'step': 21536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:21.315947', 'step': 21536, 'epoch': 3} {'type': 'loss', 'content': 0.06430812925100327, 'timestamp': '2025-09-10 02:51:21.318152', 'step': 21537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:21.348812', 'step': 21537, 'epoch': 3} {'type': 'loss', 'content': 0.0033852311316877604, 'timestamp': '2025-09-10 02:51:21.351278', 'step': 21538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.382961', 'step': 21538, 'epoch': 3} {'type': 'loss', 'content': 0.09408413618803024, 'timestamp': '2025-09-10 02:51:21.385249', 'step': 21539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.415128', 'step': 21539, 'epoch': 3} {'type': 'loss', 'content': 0.010173952206969261, 'timestamp': '2025-09-10 02:51:21.438555', 'step': 21540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:21.469587', 'step': 21540, 'epoch': 3} {'type': 'loss', 'content': 0.09485634416341782, 'timestamp': '2025-09-10 02:51:21.472326', 'step': 21541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:21.502694', 'step': 21541, 'epoch': 3} {'type': 'loss', 'content': 0.043939437717199326, 'timestamp': '2025-09-10 02:51:21.504728', 'step': 21542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.535788', 'step': 21542, 'epoch': 3} {'type': 'loss', 'content': 0.06136240437626839, 'timestamp': '2025-09-10 02:51:21.538098', 'step': 21543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.569604', 'step': 21543, 'epoch': 3} {'type': 'loss', 'content': 0.0255836620926857, 'timestamp': '2025-09-10 02:51:21.593469', 'step': 21544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:21.623974', 'step': 21544, 'epoch': 3} {'type': 'loss', 'content': 0.14013145864009857, 'timestamp': '2025-09-10 02:51:21.627083', 'step': 21545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.657584', 'step': 21545, 'epoch': 3} {'type': 'loss', 'content': 0.057115714997053146, 'timestamp': '2025-09-10 02:51:21.660691', 'step': 21546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:21.694568', 'step': 21546, 'epoch': 3} {'type': 'loss', 'content': 0.023859556764364243, 'timestamp': '2025-09-10 02:51:21.696934', 'step': 21547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:21.727935', 'step': 21547, 'epoch': 3} {'type': 'loss', 'content': 0.10174305737018585, 'timestamp': '2025-09-10 02:51:21.751239', 'step': 21548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.787483', 'step': 21548, 'epoch': 3} {'type': 'loss', 'content': 0.08883332461118698, 'timestamp': '2025-09-10 02:51:21.790293', 'step': 21549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.820527', 'step': 21549, 'epoch': 3} {'type': 'loss', 'content': 0.07885124534368515, 'timestamp': '2025-09-10 02:51:21.822842', 'step': 21550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:21.853661', 'step': 21550, 'epoch': 3} {'type': 'loss', 'content': 0.07723954319953918, 'timestamp': '2025-09-10 02:51:21.856156', 'step': 21551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:21.886460', 'step': 21551, 'epoch': 3} {'type': 'loss', 'content': 0.04138026386499405, 'timestamp': '2025-09-10 02:51:21.910770', 'step': 21552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:21.940768', 'step': 21552, 'epoch': 3} {'type': 'loss', 'content': 0.06485195457935333, 'timestamp': '2025-09-10 02:51:21.943092', 'step': 21553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:21.973725', 'step': 21553, 'epoch': 3} {'type': 'loss', 'content': 0.05771767348051071, 'timestamp': '2025-09-10 02:51:21.975638', 'step': 21554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:22.006004', 'step': 21554, 'epoch': 3} {'type': 'loss', 'content': 0.06996848434209824, 'timestamp': '2025-09-10 02:51:22.008460', 'step': 21555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:22.039745', 'step': 21555, 'epoch': 3} {'type': 'loss', 'content': 0.13759782910346985, 'timestamp': '2025-09-10 02:51:22.063193', 'step': 21556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:22.094673', 'step': 21556, 'epoch': 3} {'type': 'loss', 'content': 0.015835564583539963, 'timestamp': '2025-09-10 02:51:22.097142', 'step': 21557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:22.127962', 'step': 21557, 'epoch': 3} {'type': 'loss', 'content': 0.06987139582633972, 'timestamp': '2025-09-10 02:51:22.131138', 'step': 21558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:22.162473', 'step': 21558, 'epoch': 3} {'type': 'loss', 'content': 0.09829637408256531, 'timestamp': '2025-09-10 02:51:22.165049', 'step': 21559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:22.198362', 'step': 21559, 'epoch': 3} {'type': 'loss', 'content': 0.11855552345514297, 'timestamp': '2025-09-10 02:51:22.222591', 'step': 21560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:22.254645', 'step': 21560, 'epoch': 3} {'type': 'loss', 'content': 0.052791643887758255, 'timestamp': '2025-09-10 02:51:22.257075', 'step': 21561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:22.287595', 'step': 21561, 'epoch': 3} {'type': 'loss', 'content': 0.023426219820976257, 'timestamp': '2025-09-10 02:51:22.289906', 'step': 21562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:22.320586', 'step': 21562, 'epoch': 3} {'type': 'loss', 'content': 0.08915656805038452, 'timestamp': '2025-09-10 02:51:22.327985', 'step': 21563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:22.362677', 'step': 21563, 'epoch': 3} {'type': 'loss', 'content': 0.07181162387132645, 'timestamp': '2025-09-10 02:51:22.388693', 'step': 21564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:22.429316', 'step': 21564, 'epoch': 3} {'type': 'loss', 'content': 0.06355801224708557, 'timestamp': '2025-09-10 02:51:22.432580', 'step': 21565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:22.468021', 'step': 21565, 'epoch': 3} {'type': 'loss', 'content': 0.04565965756773949, 'timestamp': '2025-09-10 02:51:22.474401', 'step': 21566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:22.513653', 'step': 21566, 'epoch': 3} {'type': 'loss', 'content': 0.003068357240408659, 'timestamp': '2025-09-10 02:51:22.518811', 'step': 21567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:22.557491', 'step': 21567, 'epoch': 3} {'type': 'loss', 'content': 0.02726609818637371, 'timestamp': '2025-09-10 02:51:22.582137', 'step': 21568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:22.623026', 'step': 21568, 'epoch': 3} {'type': 'loss', 'content': 0.11724957078695297, 'timestamp': '2025-09-10 02:51:22.625330', 'step': 21569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:22.655848', 'step': 21569, 'epoch': 3} {'type': 'loss', 'content': 0.020014259964227676, 'timestamp': '2025-09-10 02:51:22.658310', 'step': 21570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:22.689624', 'step': 21570, 'epoch': 3} {'type': 'loss', 'content': 0.02101433090865612, 'timestamp': '2025-09-10 02:51:22.691943', 'step': 21571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:22.722564', 'step': 21571, 'epoch': 3} {'type': 'loss', 'content': 0.06655280292034149, 'timestamp': '2025-09-10 02:51:22.746256', 'step': 21572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:22.779576', 'step': 21572, 'epoch': 3} {'type': 'loss', 'content': 0.06439950317144394, 'timestamp': '2025-09-10 02:51:22.783173', 'step': 21573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:22.813674', 'step': 21573, 'epoch': 3} {'type': 'loss', 'content': 0.0392494760453701, 'timestamp': '2025-09-10 02:51:22.816162', 'step': 21574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:22.846583', 'step': 21574, 'epoch': 3} {'type': 'loss', 'content': 0.07919186353683472, 'timestamp': '2025-09-10 02:51:22.848721', 'step': 21575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:22.879320', 'step': 21575, 'epoch': 3} {'type': 'loss', 'content': 0.022400222718715668, 'timestamp': '2025-09-10 02:51:22.904375', 'step': 21576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:22.934543', 'step': 21576, 'epoch': 3} {'type': 'loss', 'content': 0.05296894162893295, 'timestamp': '2025-09-10 02:51:22.936760', 'step': 21577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:22.967395', 'step': 21577, 'epoch': 3} {'type': 'loss', 'content': 0.121210478246212, 'timestamp': '2025-09-10 02:51:22.969856', 'step': 21578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:23.000003', 'step': 21578, 'epoch': 3} {'type': 'loss', 'content': 0.018673790618777275, 'timestamp': '2025-09-10 02:51:23.002142', 'step': 21579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:23.032248', 'step': 21579, 'epoch': 3} {'type': 'loss', 'content': 0.04832149296998978, 'timestamp': '2025-09-10 02:51:23.055865', 'step': 21580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:23.086718', 'step': 21580, 'epoch': 3} {'type': 'loss', 'content': 0.01831667497754097, 'timestamp': '2025-09-10 02:51:23.089694', 'step': 21581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:23.121763', 'step': 21581, 'epoch': 3} {'type': 'loss', 'content': 0.03456970304250717, 'timestamp': '2025-09-10 02:51:23.124266', 'step': 21582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:23.155668', 'step': 21582, 'epoch': 3} {'type': 'loss', 'content': 0.011293060146272182, 'timestamp': '2025-09-10 02:51:23.158000', 'step': 21583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:23.188026', 'step': 21583, 'epoch': 3} {'type': 'loss', 'content': 0.05115964263677597, 'timestamp': '2025-09-10 02:51:23.211769', 'step': 21584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:23.242539', 'step': 21584, 'epoch': 3} {'type': 'loss', 'content': 0.024087265133857727, 'timestamp': '2025-09-10 02:51:23.244807', 'step': 21585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:23.275767', 'step': 21585, 'epoch': 3} {'type': 'loss', 'content': 0.010087797418236732, 'timestamp': '2025-09-10 02:51:23.278249', 'step': 21586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:23.308365', 'step': 21586, 'epoch': 3} {'type': 'loss', 'content': 0.1917307823896408, 'timestamp': '2025-09-10 02:51:23.311085', 'step': 21587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:23.344716', 'step': 21587, 'epoch': 3} {'type': 'loss', 'content': 0.0713605135679245, 'timestamp': '2025-09-10 02:51:23.368100', 'step': 21588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:23.399637', 'step': 21588, 'epoch': 3} {'type': 'loss', 'content': 0.04030710086226463, 'timestamp': '2025-09-10 02:51:23.403841', 'step': 21589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:23.436141', 'step': 21589, 'epoch': 3} {'type': 'loss', 'content': 0.0309202391654253, 'timestamp': '2025-09-10 02:51:23.438541', 'step': 21590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:23.469026', 'step': 21590, 'epoch': 3} {'type': 'loss', 'content': 0.009429289028048515, 'timestamp': '2025-09-10 02:51:23.472097', 'step': 21591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:23.503134', 'step': 21591, 'epoch': 3} {'type': 'loss', 'content': 0.007454622071236372, 'timestamp': '2025-09-10 02:51:23.526608', 'step': 21592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:23.558185', 'step': 21592, 'epoch': 3} {'type': 'loss', 'content': 0.03508685901761055, 'timestamp': '2025-09-10 02:51:23.560890', 'step': 21593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:23.591181', 'step': 21593, 'epoch': 3} {'type': 'loss', 'content': 0.1077265813946724, 'timestamp': '2025-09-10 02:51:23.594590', 'step': 21594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:23.625347', 'step': 21594, 'epoch': 3} {'type': 'loss', 'content': 0.05438747629523277, 'timestamp': '2025-09-10 02:51:23.628281', 'step': 21595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:23.659038', 'step': 21595, 'epoch': 3} {'type': 'loss', 'content': 0.029227156192064285, 'timestamp': '2025-09-10 02:51:23.682999', 'step': 21596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:23.713730', 'step': 21596, 'epoch': 3} {'type': 'loss', 'content': 0.040834613144397736, 'timestamp': '2025-09-10 02:51:23.715950', 'step': 21597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:23.745881', 'step': 21597, 'epoch': 3} {'type': 'loss', 'content': 0.03452720120549202, 'timestamp': '2025-09-10 02:51:23.748112', 'step': 21598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:23.779467', 'step': 21598, 'epoch': 3} {'type': 'loss', 'content': 0.08491165935993195, 'timestamp': '2025-09-10 02:51:23.783938', 'step': 21599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:23.814879', 'step': 21599, 'epoch': 3} {'type': 'loss', 'content': 0.04979832470417023, 'timestamp': '2025-09-10 02:51:23.838196', 'step': 21600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:23.870626', 'step': 21600, 'epoch': 3} {'type': 'loss', 'content': 0.017506595700979233, 'timestamp': '2025-09-10 02:51:23.873113', 'step': 21601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:23.903534', 'step': 21601, 'epoch': 3} {'type': 'loss', 'content': 0.07711584866046906, 'timestamp': '2025-09-10 02:51:23.905722', 'step': 21602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:23.935915', 'step': 21602, 'epoch': 3} {'type': 'loss', 'content': 0.037141624838113785, 'timestamp': '2025-09-10 02:51:23.937915', 'step': 21603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:23.968439', 'step': 21603, 'epoch': 3} {'type': 'loss', 'content': 0.04630935192108154, 'timestamp': '2025-09-10 02:51:23.992423', 'step': 21604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:24.028574', 'step': 21604, 'epoch': 3} {'type': 'loss', 'content': 0.047116007655858994, 'timestamp': '2025-09-10 02:51:24.034522', 'step': 21605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:24.074407', 'step': 21605, 'epoch': 3} {'type': 'loss', 'content': 0.07707918435335159, 'timestamp': '2025-09-10 02:51:24.077494', 'step': 21606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:24.109513', 'step': 21606, 'epoch': 3} {'type': 'loss', 'content': 0.02381550706923008, 'timestamp': '2025-09-10 02:51:24.111656', 'step': 21607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:24.142679', 'step': 21607, 'epoch': 3} {'type': 'loss', 'content': 0.11738843470811844, 'timestamp': '2025-09-10 02:51:24.167863', 'step': 21608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:24.200147', 'step': 21608, 'epoch': 3} {'type': 'loss', 'content': 0.10877388715744019, 'timestamp': '2025-09-10 02:51:24.202875', 'step': 21609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:24.233919', 'step': 21609, 'epoch': 3} {'type': 'loss', 'content': 0.0956200435757637, 'timestamp': '2025-09-10 02:51:24.236640', 'step': 21610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:24.267606', 'step': 21610, 'epoch': 3} {'type': 'loss', 'content': 0.06596671789884567, 'timestamp': '2025-09-10 02:51:24.269893', 'step': 21611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:24.300742', 'step': 21611, 'epoch': 3} {'type': 'loss', 'content': 0.015290480107069016, 'timestamp': '2025-09-10 02:51:24.324736', 'step': 21612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:24.355016', 'step': 21612, 'epoch': 3} {'type': 'loss', 'content': 0.06577987968921661, 'timestamp': '2025-09-10 02:51:24.357586', 'step': 21613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:24.388051', 'step': 21613, 'epoch': 3} {'type': 'loss', 'content': 0.04970064014196396, 'timestamp': '2025-09-10 02:51:24.390543', 'step': 21614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:24.421572', 'step': 21614, 'epoch': 3} {'type': 'loss', 'content': 0.028314340859651566, 'timestamp': '2025-09-10 02:51:24.424199', 'step': 21615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:24.454378', 'step': 21615, 'epoch': 3} {'type': 'loss', 'content': 0.07951323688030243, 'timestamp': '2025-09-10 02:51:24.477885', 'step': 21616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:24.508925', 'step': 21616, 'epoch': 3} {'type': 'loss', 'content': 0.07343136519193649, 'timestamp': '2025-09-10 02:51:24.511613', 'step': 21617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:24.543132', 'step': 21617, 'epoch': 3} {'type': 'loss', 'content': 0.08425062894821167, 'timestamp': '2025-09-10 02:51:24.545963', 'step': 21618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:24.578611', 'step': 21618, 'epoch': 3} {'type': 'loss', 'content': 0.07192463427782059, 'timestamp': '2025-09-10 02:51:24.581168', 'step': 21619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:24.611662', 'step': 21619, 'epoch': 3} {'type': 'loss', 'content': 0.01732158474624157, 'timestamp': '2025-09-10 02:51:24.635472', 'step': 21620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:24.666573', 'step': 21620, 'epoch': 3} {'type': 'loss', 'content': 0.07546144723892212, 'timestamp': '2025-09-10 02:51:24.668873', 'step': 21621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:24.700791', 'step': 21621, 'epoch': 3} {'type': 'loss', 'content': 0.062271274626255035, 'timestamp': '2025-09-10 02:51:24.702877', 'step': 21622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:24.735863', 'step': 21622, 'epoch': 3} {'type': 'loss', 'content': 0.019214317202568054, 'timestamp': '2025-09-10 02:51:24.738824', 'step': 21623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:24.769284', 'step': 21623, 'epoch': 3} {'type': 'loss', 'content': 0.07203122973442078, 'timestamp': '2025-09-10 02:51:24.794282', 'step': 21624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:24.825161', 'step': 21624, 'epoch': 3} {'type': 'loss', 'content': 0.10469004511833191, 'timestamp': '2025-09-10 02:51:24.827387', 'step': 21625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:24.858570', 'step': 21625, 'epoch': 3} {'type': 'loss', 'content': 0.06301205605268478, 'timestamp': '2025-09-10 02:51:24.863036', 'step': 21626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:24.893273', 'step': 21626, 'epoch': 3} {'type': 'loss', 'content': 0.09831475466489792, 'timestamp': '2025-09-10 02:51:24.895680', 'step': 21627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:24.926487', 'step': 21627, 'epoch': 3} {'type': 'loss', 'content': 0.06669405847787857, 'timestamp': '2025-09-10 02:51:24.951179', 'step': 21628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:24.983198', 'step': 21628, 'epoch': 3} {'type': 'loss', 'content': 0.015380287542939186, 'timestamp': '2025-09-10 02:51:24.985443', 'step': 21629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:25.016903', 'step': 21629, 'epoch': 3} {'type': 'loss', 'content': 0.045580215752124786, 'timestamp': '2025-09-10 02:51:25.019282', 'step': 21630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:25.049487', 'step': 21630, 'epoch': 3} {'type': 'loss', 'content': 0.12669385969638824, 'timestamp': '2025-09-10 02:51:25.051827', 'step': 21631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:25.082224', 'step': 21631, 'epoch': 3} {'type': 'loss', 'content': 0.058108799159526825, 'timestamp': '2025-09-10 02:51:25.105866', 'step': 21632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:25.137751', 'step': 21632, 'epoch': 3} {'type': 'loss', 'content': 0.04587608575820923, 'timestamp': '2025-09-10 02:51:25.140236', 'step': 21633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:25.171191', 'step': 21633, 'epoch': 3} {'type': 'loss', 'content': 0.07385800033807755, 'timestamp': '2025-09-10 02:51:25.173498', 'step': 21634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:25.205163', 'step': 21634, 'epoch': 3} {'type': 'loss', 'content': 0.06642628461122513, 'timestamp': '2025-09-10 02:51:25.207547', 'step': 21635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:25.240752', 'step': 21635, 'epoch': 3} {'type': 'loss', 'content': 0.10010155290365219, 'timestamp': '2025-09-10 02:51:25.264615', 'step': 21636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-10 02:51:25.295974', 'step': 21636, 'epoch': 3} {'type': 'loss', 'content': 0.07526078820228577, 'timestamp': '2025-09-10 02:51:25.300912', 'step': 21637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:25.332940', 'step': 21637, 'epoch': 3} {'type': 'loss', 'content': 0.06194455549120903, 'timestamp': '2025-09-10 02:51:25.340001', 'step': 21638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:25.382474', 'step': 21638, 'epoch': 3} {'type': 'loss', 'content': 0.06087091937661171, 'timestamp': '2025-09-10 02:51:25.385351', 'step': 21639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:25.420148', 'step': 21639, 'epoch': 3} {'type': 'loss', 'content': 0.06490488350391388, 'timestamp': '2025-09-10 02:51:25.443719', 'step': 21640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:25.473875', 'step': 21640, 'epoch': 3} {'type': 'loss', 'content': 0.05874927341938019, 'timestamp': '2025-09-10 02:51:25.476385', 'step': 21641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:25.507577', 'step': 21641, 'epoch': 3} {'type': 'loss', 'content': 0.0644691213965416, 'timestamp': '2025-09-10 02:51:25.510901', 'step': 21642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:25.544853', 'step': 21642, 'epoch': 3} {'type': 'loss', 'content': 0.06104940176010132, 'timestamp': '2025-09-10 02:51:25.547200', 'step': 21643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:25.580247', 'step': 21643, 'epoch': 3} {'type': 'loss', 'content': 0.04589148238301277, 'timestamp': '2025-09-10 02:51:25.603798', 'step': 21644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:25.634635', 'step': 21644, 'epoch': 3} {'type': 'loss', 'content': 0.009501808322966099, 'timestamp': '2025-09-10 02:51:25.636927', 'step': 21645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:25.667424', 'step': 21645, 'epoch': 3} {'type': 'loss', 'content': 0.043451715260744095, 'timestamp': '2025-09-10 02:51:25.669621', 'step': 21646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:25.702172', 'step': 21646, 'epoch': 3} {'type': 'loss', 'content': 0.033061105757951736, 'timestamp': '2025-09-10 02:51:25.705711', 'step': 21647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:25.738623', 'step': 21647, 'epoch': 3} {'type': 'loss', 'content': 0.08718004077672958, 'timestamp': '2025-09-10 02:51:25.762799', 'step': 21648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:25.803266', 'step': 21648, 'epoch': 3} {'type': 'loss', 'content': 0.051614467054605484, 'timestamp': '2025-09-10 02:51:25.806006', 'step': 21649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:25.836990', 'step': 21649, 'epoch': 3} {'type': 'loss', 'content': 0.08124235272407532, 'timestamp': '2025-09-10 02:51:25.839217', 'step': 21650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:25.870576', 'step': 21650, 'epoch': 3} {'type': 'loss', 'content': 0.0919744074344635, 'timestamp': '2025-09-10 02:51:25.873192', 'step': 21651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:25.903849', 'step': 21651, 'epoch': 3} {'type': 'loss', 'content': 0.06737059354782104, 'timestamp': '2025-09-10 02:51:25.927619', 'step': 21652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:25.957980', 'step': 21652, 'epoch': 3} {'type': 'loss', 'content': 0.04802495613694191, 'timestamp': '2025-09-10 02:51:25.960736', 'step': 21653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:25.990487', 'step': 21653, 'epoch': 3} {'type': 'loss', 'content': 0.013170788995921612, 'timestamp': '2025-09-10 02:51:25.992753', 'step': 21654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.023164', 'step': 21654, 'epoch': 3} {'type': 'loss', 'content': 0.019786907359957695, 'timestamp': '2025-09-10 02:51:26.025191', 'step': 21655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.054860', 'step': 21655, 'epoch': 3} {'type': 'loss', 'content': 0.10880804061889648, 'timestamp': '2025-09-10 02:51:26.078401', 'step': 21656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.108784', 'step': 21656, 'epoch': 3} {'type': 'loss', 'content': 0.07294358313083649, 'timestamp': '2025-09-10 02:51:26.111343', 'step': 21657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.143122', 'step': 21657, 'epoch': 3} {'type': 'loss', 'content': 0.11158633232116699, 'timestamp': '2025-09-10 02:51:26.145644', 'step': 21658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:26.175811', 'step': 21658, 'epoch': 3} {'type': 'loss', 'content': 0.02965579740703106, 'timestamp': '2025-09-10 02:51:26.178267', 'step': 21659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:26.209625', 'step': 21659, 'epoch': 3} {'type': 'loss', 'content': 0.08490727096796036, 'timestamp': '2025-09-10 02:51:26.233438', 'step': 21660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.265467', 'step': 21660, 'epoch': 3} {'type': 'loss', 'content': 0.08628007024526596, 'timestamp': '2025-09-10 02:51:26.267552', 'step': 21661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:26.299759', 'step': 21661, 'epoch': 3} {'type': 'loss', 'content': 0.03356103599071503, 'timestamp': '2025-09-10 02:51:26.302839', 'step': 21662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:26.333385', 'step': 21662, 'epoch': 3} {'type': 'loss', 'content': 0.0619715116918087, 'timestamp': '2025-09-10 02:51:26.337732', 'step': 21663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.379070', 'step': 21663, 'epoch': 3} {'type': 'loss', 'content': 0.04368964955210686, 'timestamp': '2025-09-10 02:51:26.402703', 'step': 21664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:26.448005', 'step': 21664, 'epoch': 3} {'type': 'loss', 'content': 0.07636338472366333, 'timestamp': '2025-09-10 02:51:26.450350', 'step': 21665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.494880', 'step': 21665, 'epoch': 3} {'type': 'loss', 'content': 0.07941316068172455, 'timestamp': '2025-09-10 02:51:26.505183', 'step': 21666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:26.558902', 'step': 21666, 'epoch': 3} {'type': 'loss', 'content': 0.09021187573671341, 'timestamp': '2025-09-10 02:51:26.567015', 'step': 21667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:26.598598', 'step': 21667, 'epoch': 3} {'type': 'loss', 'content': 0.07133529335260391, 'timestamp': '2025-09-10 02:51:26.629587', 'step': 21668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.666882', 'step': 21668, 'epoch': 3} {'type': 'loss', 'content': 0.044187624007463455, 'timestamp': '2025-09-10 02:51:26.671693', 'step': 21669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:26.707563', 'step': 21669, 'epoch': 3} {'type': 'loss', 'content': 0.09571034461259842, 'timestamp': '2025-09-10 02:51:26.709809', 'step': 21670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.740122', 'step': 21670, 'epoch': 3} {'type': 'loss', 'content': 0.025523558259010315, 'timestamp': '2025-09-10 02:51:26.742658', 'step': 21671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.773080', 'step': 21671, 'epoch': 3} {'type': 'loss', 'content': 0.08231423795223236, 'timestamp': '2025-09-10 02:51:26.797941', 'step': 21672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:26.829719', 'step': 21672, 'epoch': 3} {'type': 'loss', 'content': 0.0913163498044014, 'timestamp': '2025-09-10 02:51:26.832932', 'step': 21673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:26.864365', 'step': 21673, 'epoch': 3} {'type': 'loss', 'content': 0.03221016377210617, 'timestamp': '2025-09-10 02:51:26.866498', 'step': 21674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:26.896069', 'step': 21674, 'epoch': 3} {'type': 'loss', 'content': 0.05444793403148651, 'timestamp': '2025-09-10 02:51:26.898661', 'step': 21675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:26.929431', 'step': 21675, 'epoch': 3} {'type': 'loss', 'content': 0.06495451927185059, 'timestamp': '2025-09-10 02:51:26.953432', 'step': 21676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:26.984974', 'step': 21676, 'epoch': 3} {'type': 'loss', 'content': 0.06778132915496826, 'timestamp': '2025-09-10 02:51:26.987744', 'step': 21677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:27.018161', 'step': 21677, 'epoch': 3} {'type': 'loss', 'content': 0.025036919862031937, 'timestamp': '2025-09-10 02:51:27.020842', 'step': 21678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:27.052478', 'step': 21678, 'epoch': 3} {'type': 'loss', 'content': 0.07240018248558044, 'timestamp': '2025-09-10 02:51:27.055363', 'step': 21679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:27.086825', 'step': 21679, 'epoch': 3} {'type': 'loss', 'content': 0.055598970502614975, 'timestamp': '2025-09-10 02:51:27.110726', 'step': 21680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.141871', 'step': 21680, 'epoch': 3} {'type': 'loss', 'content': 0.11116964370012283, 'timestamp': '2025-09-10 02:51:27.144887', 'step': 21681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:27.175727', 'step': 21681, 'epoch': 3} {'type': 'loss', 'content': 0.07712364196777344, 'timestamp': '2025-09-10 02:51:27.178508', 'step': 21682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:27.209960', 'step': 21682, 'epoch': 3} {'type': 'loss', 'content': 0.03391354903578758, 'timestamp': '2025-09-10 02:51:27.212575', 'step': 21683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.244115', 'step': 21683, 'epoch': 3} {'type': 'loss', 'content': 0.05035346746444702, 'timestamp': '2025-09-10 02:51:27.268310', 'step': 21684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.301581', 'step': 21684, 'epoch': 3} {'type': 'loss', 'content': 0.09039410948753357, 'timestamp': '2025-09-10 02:51:27.303858', 'step': 21685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:27.334617', 'step': 21685, 'epoch': 3} {'type': 'loss', 'content': 0.0948733538389206, 'timestamp': '2025-09-10 02:51:27.337279', 'step': 21686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.369648', 'step': 21686, 'epoch': 3} {'type': 'loss', 'content': 0.0664607584476471, 'timestamp': '2025-09-10 02:51:27.372125', 'step': 21687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.402300', 'step': 21687, 'epoch': 3} {'type': 'loss', 'content': 0.08002371340990067, 'timestamp': '2025-09-10 02:51:27.432586', 'step': 21688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.482209', 'step': 21688, 'epoch': 3} {'type': 'loss', 'content': 0.034799687564373016, 'timestamp': '2025-09-10 02:51:27.490631', 'step': 21689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.522264', 'step': 21689, 'epoch': 3} {'type': 'loss', 'content': 0.07585006952285767, 'timestamp': '2025-09-10 02:51:27.525615', 'step': 21690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:27.556538', 'step': 21690, 'epoch': 3} {'type': 'loss', 'content': 0.08262954652309418, 'timestamp': '2025-09-10 02:51:27.560460', 'step': 21691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:27.592909', 'step': 21691, 'epoch': 3} {'type': 'loss', 'content': 0.08085479587316513, 'timestamp': '2025-09-10 02:51:27.616484', 'step': 21692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:27.647616', 'step': 21692, 'epoch': 3} {'type': 'loss', 'content': 0.031786467880010605, 'timestamp': '2025-09-10 02:51:27.650646', 'step': 21693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:27.681500', 'step': 21693, 'epoch': 3} {'type': 'loss', 'content': 0.06319157034158707, 'timestamp': '2025-09-10 02:51:27.684848', 'step': 21694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:27.716005', 'step': 21694, 'epoch': 3} {'type': 'loss', 'content': 0.03937844932079315, 'timestamp': '2025-09-10 02:51:27.718692', 'step': 21695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:27.750264', 'step': 21695, 'epoch': 3} {'type': 'loss', 'content': 0.06939608603715897, 'timestamp': '2025-09-10 02:51:27.774507', 'step': 21696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.809058', 'step': 21696, 'epoch': 3} {'type': 'loss', 'content': 0.039027560502290726, 'timestamp': '2025-09-10 02:51:27.812761', 'step': 21697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.843822', 'step': 21697, 'epoch': 3} {'type': 'loss', 'content': 0.047782063484191895, 'timestamp': '2025-09-10 02:51:27.848371', 'step': 21698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:27.882322', 'step': 21698, 'epoch': 3} {'type': 'loss', 'content': 0.04227843135595322, 'timestamp': '2025-09-10 02:51:27.884908', 'step': 21699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:27.914543', 'step': 21699, 'epoch': 3} {'type': 'loss', 'content': 0.053870126605033875, 'timestamp': '2025-09-10 02:51:27.938646', 'step': 21700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:27.971618', 'step': 21700, 'epoch': 3} {'type': 'loss', 'content': 0.012874701991677284, 'timestamp': '2025-09-10 02:51:27.975796', 'step': 21701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:28.013871', 'step': 21701, 'epoch': 3} {'type': 'loss', 'content': 0.03465503081679344, 'timestamp': '2025-09-10 02:51:28.016394', 'step': 21702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:28.047277', 'step': 21702, 'epoch': 3} {'type': 'loss', 'content': 0.12355723232030869, 'timestamp': '2025-09-10 02:51:28.051741', 'step': 21703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.082677', 'step': 21703, 'epoch': 3} {'type': 'loss', 'content': 0.05384272336959839, 'timestamp': '2025-09-10 02:51:28.106429', 'step': 21704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:28.136789', 'step': 21704, 'epoch': 3} {'type': 'loss', 'content': 0.048048749566078186, 'timestamp': '2025-09-10 02:51:28.139982', 'step': 21705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:28.169954', 'step': 21705, 'epoch': 3} {'type': 'loss', 'content': 0.027314510196447372, 'timestamp': '2025-09-10 02:51:28.172674', 'step': 21706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:28.203704', 'step': 21706, 'epoch': 3} {'type': 'loss', 'content': 0.11534029245376587, 'timestamp': '2025-09-10 02:51:28.206230', 'step': 21707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.236777', 'step': 21707, 'epoch': 3} {'type': 'loss', 'content': 0.0736636146903038, 'timestamp': '2025-09-10 02:51:28.261207', 'step': 21708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:28.292554', 'step': 21708, 'epoch': 3} {'type': 'loss', 'content': 0.05523936450481415, 'timestamp': '2025-09-10 02:51:28.295115', 'step': 21709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.326604', 'step': 21709, 'epoch': 3} {'type': 'loss', 'content': 0.07689300179481506, 'timestamp': '2025-09-10 02:51:28.330700', 'step': 21710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.362398', 'step': 21710, 'epoch': 3} {'type': 'loss', 'content': 0.09799575060606003, 'timestamp': '2025-09-10 02:51:28.364593', 'step': 21711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:28.394675', 'step': 21711, 'epoch': 3} {'type': 'loss', 'content': 0.05736472085118294, 'timestamp': '2025-09-10 02:51:28.418183', 'step': 21712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:28.448144', 'step': 21712, 'epoch': 3} {'type': 'loss', 'content': 0.06791869550943375, 'timestamp': '2025-09-10 02:51:28.450597', 'step': 21713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:28.482703', 'step': 21713, 'epoch': 3} {'type': 'loss', 'content': 0.09695272892713547, 'timestamp': '2025-09-10 02:51:28.484943', 'step': 21714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:28.514968', 'step': 21714, 'epoch': 3} {'type': 'loss', 'content': 0.024576999247074127, 'timestamp': '2025-09-10 02:51:28.517460', 'step': 21715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.547739', 'step': 21715, 'epoch': 3} {'type': 'loss', 'content': 0.0803164690732956, 'timestamp': '2025-09-10 02:51:28.571749', 'step': 21716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:28.602364', 'step': 21716, 'epoch': 3} {'type': 'loss', 'content': 0.07025618106126785, 'timestamp': '2025-09-10 02:51:28.604669', 'step': 21717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.635280', 'step': 21717, 'epoch': 3} {'type': 'loss', 'content': 0.11667211353778839, 'timestamp': '2025-09-10 02:51:28.637982', 'step': 21718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.667834', 'step': 21718, 'epoch': 3} {'type': 'loss', 'content': 0.05178951472043991, 'timestamp': '2025-09-10 02:51:28.670164', 'step': 21719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:28.699716', 'step': 21719, 'epoch': 3} {'type': 'loss', 'content': 0.09588586539030075, 'timestamp': '2025-09-10 02:51:28.723603', 'step': 21720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.753376', 'step': 21720, 'epoch': 3} {'type': 'loss', 'content': 0.09742243587970734, 'timestamp': '2025-09-10 02:51:28.755530', 'step': 21721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:28.787340', 'step': 21721, 'epoch': 3} {'type': 'loss', 'content': 0.1177469789981842, 'timestamp': '2025-09-10 02:51:28.789733', 'step': 21722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.828718', 'step': 21722, 'epoch': 3} {'type': 'loss', 'content': 0.13557273149490356, 'timestamp': '2025-09-10 02:51:28.831331', 'step': 21723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.861625', 'step': 21723, 'epoch': 3} {'type': 'loss', 'content': 0.06552041321992874, 'timestamp': '2025-09-10 02:51:28.886779', 'step': 21724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:28.918247', 'step': 21724, 'epoch': 3} {'type': 'loss', 'content': 0.12347747385501862, 'timestamp': '2025-09-10 02:51:28.921023', 'step': 21725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:28.951661', 'step': 21725, 'epoch': 3} {'type': 'loss', 'content': 0.06647340953350067, 'timestamp': '2025-09-10 02:51:28.954109', 'step': 21726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:28.987040', 'step': 21726, 'epoch': 3} {'type': 'loss', 'content': 0.016660798341035843, 'timestamp': '2025-09-10 02:51:28.989575', 'step': 21727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:29.019815', 'step': 21727, 'epoch': 3} {'type': 'loss', 'content': 0.03807805851101875, 'timestamp': '2025-09-10 02:51:29.043456', 'step': 21728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:29.074897', 'step': 21728, 'epoch': 3} {'type': 'loss', 'content': 0.08136728405952454, 'timestamp': '2025-09-10 02:51:29.077530', 'step': 21729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:29.107909', 'step': 21729, 'epoch': 3} {'type': 'loss', 'content': 0.054060641676187515, 'timestamp': '2025-09-10 02:51:29.110437', 'step': 21730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:29.140470', 'step': 21730, 'epoch': 3} {'type': 'loss', 'content': 0.01866690255701542, 'timestamp': '2025-09-10 02:51:29.142936', 'step': 21731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:29.173223', 'step': 21731, 'epoch': 3} {'type': 'loss', 'content': 0.11824367940425873, 'timestamp': '2025-09-10 02:51:29.196656', 'step': 21732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.226953', 'step': 21732, 'epoch': 3} {'type': 'loss', 'content': 0.028407299891114235, 'timestamp': '2025-09-10 02:51:29.229684', 'step': 21733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.260117', 'step': 21733, 'epoch': 3} {'type': 'loss', 'content': 0.018367262557148933, 'timestamp': '2025-09-10 02:51:29.262903', 'step': 21734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.295503', 'step': 21734, 'epoch': 3} {'type': 'loss', 'content': 0.05543728545308113, 'timestamp': '2025-09-10 02:51:29.297836', 'step': 21735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.327271', 'step': 21735, 'epoch': 3} {'type': 'loss', 'content': 0.048834219574928284, 'timestamp': '2025-09-10 02:51:29.350742', 'step': 21736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.385126', 'step': 21736, 'epoch': 3} {'type': 'loss', 'content': 0.041069090366363525, 'timestamp': '2025-09-10 02:51:29.390327', 'step': 21737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.431800', 'step': 21737, 'epoch': 3} {'type': 'loss', 'content': 0.059406429529190063, 'timestamp': '2025-09-10 02:51:29.434642', 'step': 21738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:29.465132', 'step': 21738, 'epoch': 3} {'type': 'loss', 'content': 0.07566330581903458, 'timestamp': '2025-09-10 02:51:29.469410', 'step': 21739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.500268', 'step': 21739, 'epoch': 3} {'type': 'loss', 'content': 0.06514588743448257, 'timestamp': '2025-09-10 02:51:29.523629', 'step': 21740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.554024', 'step': 21740, 'epoch': 3} {'type': 'loss', 'content': 0.08005222678184509, 'timestamp': '2025-09-10 02:51:29.556517', 'step': 21741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:29.587134', 'step': 21741, 'epoch': 3} {'type': 'loss', 'content': 0.08533160388469696, 'timestamp': '2025-09-10 02:51:29.589208', 'step': 21742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:29.619690', 'step': 21742, 'epoch': 3} {'type': 'loss', 'content': 0.06495233625173569, 'timestamp': '2025-09-10 02:51:29.622818', 'step': 21743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:29.653688', 'step': 21743, 'epoch': 3} {'type': 'loss', 'content': 0.0837368592619896, 'timestamp': '2025-09-10 02:51:29.677241', 'step': 21744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:29.707767', 'step': 21744, 'epoch': 3} {'type': 'loss', 'content': 0.05261179804801941, 'timestamp': '2025-09-10 02:51:29.710032', 'step': 21745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:29.739783', 'step': 21745, 'epoch': 3} {'type': 'loss', 'content': 0.12078598886728287, 'timestamp': '2025-09-10 02:51:29.742326', 'step': 21746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.773037', 'step': 21746, 'epoch': 3} {'type': 'loss', 'content': 0.10895467549562454, 'timestamp': '2025-09-10 02:51:29.775396', 'step': 21747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:29.809863', 'step': 21747, 'epoch': 3} {'type': 'loss', 'content': 0.05415678024291992, 'timestamp': '2025-09-10 02:51:29.833749', 'step': 21748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.863889', 'step': 21748, 'epoch': 3} {'type': 'loss', 'content': 0.07091784477233887, 'timestamp': '2025-09-10 02:51:29.866486', 'step': 21749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:29.898502', 'step': 21749, 'epoch': 3} {'type': 'loss', 'content': 0.07067817449569702, 'timestamp': '2025-09-10 02:51:29.901554', 'step': 21750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:29.931329', 'step': 21750, 'epoch': 3} {'type': 'loss', 'content': 0.08712770789861679, 'timestamp': '2025-09-10 02:51:29.933883', 'step': 21751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:29.963721', 'step': 21751, 'epoch': 3} {'type': 'loss', 'content': 0.03547175973653793, 'timestamp': '2025-09-10 02:51:29.987593', 'step': 21752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:30.018559', 'step': 21752, 'epoch': 3} {'type': 'loss', 'content': 0.0815921425819397, 'timestamp': '2025-09-10 02:51:30.020977', 'step': 21753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:30.051420', 'step': 21753, 'epoch': 3} {'type': 'loss', 'content': 0.05236315727233887, 'timestamp': '2025-09-10 02:51:30.054077', 'step': 21754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:30.084023', 'step': 21754, 'epoch': 3} {'type': 'loss', 'content': 0.037371061742305756, 'timestamp': '2025-09-10 02:51:30.086392', 'step': 21755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:30.116251', 'step': 21755, 'epoch': 3} {'type': 'loss', 'content': 0.05361630767583847, 'timestamp': '2025-09-10 02:51:30.139767', 'step': 21756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:30.169977', 'step': 21756, 'epoch': 3} {'type': 'loss', 'content': 0.06209331005811691, 'timestamp': '2025-09-10 02:51:30.172626', 'step': 21757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:30.206116', 'step': 21757, 'epoch': 3} {'type': 'loss', 'content': 0.07345245778560638, 'timestamp': '2025-09-10 02:51:30.208398', 'step': 21758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:30.238821', 'step': 21758, 'epoch': 3} {'type': 'loss', 'content': 0.07659631222486496, 'timestamp': '2025-09-10 02:51:30.241534', 'step': 21759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:30.272045', 'step': 21759, 'epoch': 3} {'type': 'loss', 'content': 0.04284633323550224, 'timestamp': '2025-09-10 02:51:30.295682', 'step': 21760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:30.325264', 'step': 21760, 'epoch': 3} {'type': 'loss', 'content': 0.08819238841533661, 'timestamp': '2025-09-10 02:51:30.327856', 'step': 21761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:30.358235', 'step': 21761, 'epoch': 3} {'type': 'loss', 'content': 0.034224748611450195, 'timestamp': '2025-09-10 02:51:30.360719', 'step': 21762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:30.390807', 'step': 21762, 'epoch': 3} {'type': 'loss', 'content': 0.06425691395998001, 'timestamp': '2025-09-10 02:51:30.393339', 'step': 21763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:30.423687', 'step': 21763, 'epoch': 3} {'type': 'loss', 'content': 0.05215063318610191, 'timestamp': '2025-09-10 02:51:30.446988', 'step': 21764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:30.478013', 'step': 21764, 'epoch': 3} {'type': 'loss', 'content': 0.06722637265920639, 'timestamp': '2025-09-10 02:51:30.480977', 'step': 21765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:30.511199', 'step': 21765, 'epoch': 3} {'type': 'loss', 'content': 0.046814605593681335, 'timestamp': '2025-09-10 02:51:30.513987', 'step': 21766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:30.543735', 'step': 21766, 'epoch': 3} {'type': 'loss', 'content': 0.04030359163880348, 'timestamp': '2025-09-10 02:51:30.546315', 'step': 21767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:30.576081', 'step': 21767, 'epoch': 3} {'type': 'loss', 'content': 0.05783716216683388, 'timestamp': '2025-09-10 02:51:30.599837', 'step': 21768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:30.630455', 'step': 21768, 'epoch': 3} {'type': 'loss', 'content': 0.059746719896793365, 'timestamp': '2025-09-10 02:51:30.632722', 'step': 21769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:30.663955', 'step': 21769, 'epoch': 3} {'type': 'loss', 'content': 0.03195231035351753, 'timestamp': '2025-09-10 02:51:30.666450', 'step': 21770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:30.697013', 'step': 21770, 'epoch': 3} {'type': 'loss', 'content': 0.014360923320055008, 'timestamp': '2025-09-10 02:51:30.699366', 'step': 21771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:30.731150', 'step': 21771, 'epoch': 3} {'type': 'loss', 'content': 0.02717728540301323, 'timestamp': '2025-09-10 02:51:30.754680', 'step': 21772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:30.784978', 'step': 21772, 'epoch': 3} {'type': 'loss', 'content': 0.03857501968741417, 'timestamp': '2025-09-10 02:51:30.791941', 'step': 21773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:30.827050', 'step': 21773, 'epoch': 3} {'type': 'loss', 'content': 0.03161337971687317, 'timestamp': '2025-09-10 02:51:30.829737', 'step': 21774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:30.860497', 'step': 21774, 'epoch': 3} {'type': 'loss', 'content': 0.06272094696760178, 'timestamp': '2025-09-10 02:51:30.864598', 'step': 21775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:30.898211', 'step': 21775, 'epoch': 3} {'type': 'loss', 'content': 0.03469042852520943, 'timestamp': '2025-09-10 02:51:30.922061', 'step': 21776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:30.952076', 'step': 21776, 'epoch': 3} {'type': 'loss', 'content': 0.03515751659870148, 'timestamp': '2025-09-10 02:51:30.954186', 'step': 21777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:30.984023', 'step': 21777, 'epoch': 3} {'type': 'loss', 'content': 0.05154392868280411, 'timestamp': '2025-09-10 02:51:30.987844', 'step': 21778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:31.017376', 'step': 21778, 'epoch': 3} {'type': 'loss', 'content': 0.0943678542971611, 'timestamp': '2025-09-10 02:51:31.019808', 'step': 21779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:31.050146', 'step': 21779, 'epoch': 3} {'type': 'loss', 'content': 0.05878864601254463, 'timestamp': '2025-09-10 02:51:31.073916', 'step': 21780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:31.105295', 'step': 21780, 'epoch': 3} {'type': 'loss', 'content': 0.08382904529571533, 'timestamp': '2025-09-10 02:51:31.109630', 'step': 21781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:31.140171', 'step': 21781, 'epoch': 3} {'type': 'loss', 'content': 0.04096049815416336, 'timestamp': '2025-09-10 02:51:31.142948', 'step': 21782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:31.173074', 'step': 21782, 'epoch': 3} {'type': 'loss', 'content': 0.14243926107883453, 'timestamp': '2025-09-10 02:51:31.177742', 'step': 21783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:31.207749', 'step': 21783, 'epoch': 3} {'type': 'loss', 'content': 0.12291539460420609, 'timestamp': '2025-09-10 02:51:31.231822', 'step': 21784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:31.262168', 'step': 21784, 'epoch': 3} {'type': 'loss', 'content': 0.04331882670521736, 'timestamp': '2025-09-10 02:51:31.266089', 'step': 21785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:31.297834', 'step': 21785, 'epoch': 3} {'type': 'loss', 'content': 0.04720056429505348, 'timestamp': '2025-09-10 02:51:31.300660', 'step': 21786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:31.330885', 'step': 21786, 'epoch': 3} {'type': 'loss', 'content': 0.075328029692173, 'timestamp': '2025-09-10 02:51:31.333206', 'step': 21787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:31.363949', 'step': 21787, 'epoch': 3} {'type': 'loss', 'content': 0.052206214517354965, 'timestamp': '2025-09-10 02:51:31.387679', 'step': 21788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:31.419659', 'step': 21788, 'epoch': 3} {'type': 'loss', 'content': 0.07064922153949738, 'timestamp': '2025-09-10 02:51:31.422312', 'step': 21789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:31.451953', 'step': 21789, 'epoch': 3} {'type': 'loss', 'content': 0.03586176782846451, 'timestamp': '2025-09-10 02:51:31.454354', 'step': 21790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:31.484532', 'step': 21790, 'epoch': 3} {'type': 'loss', 'content': 0.11730733513832092, 'timestamp': '2025-09-10 02:51:31.487123', 'step': 21791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:31.518892', 'step': 21791, 'epoch': 3} {'type': 'loss', 'content': 0.11283977329730988, 'timestamp': '2025-09-10 02:51:31.542505', 'step': 21792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:31.572625', 'step': 21792, 'epoch': 3} {'type': 'loss', 'content': 0.13215923309326172, 'timestamp': '2025-09-10 02:51:31.575670', 'step': 21793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:31.605644', 'step': 21793, 'epoch': 3} {'type': 'loss', 'content': 0.10542909055948257, 'timestamp': '2025-09-10 02:51:31.608564', 'step': 21794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:31.639703', 'step': 21794, 'epoch': 3} {'type': 'loss', 'content': 0.11161468178033829, 'timestamp': '2025-09-10 02:51:31.642115', 'step': 21795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:31.672340', 'step': 21795, 'epoch': 3} {'type': 'loss', 'content': 0.05329134315252304, 'timestamp': '2025-09-10 02:51:31.696468', 'step': 21796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:31.727086', 'step': 21796, 'epoch': 3} {'type': 'loss', 'content': 0.08631668239831924, 'timestamp': '2025-09-10 02:51:31.729630', 'step': 21797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:31.759885', 'step': 21797, 'epoch': 3} {'type': 'loss', 'content': 0.04709946736693382, 'timestamp': '2025-09-10 02:51:31.762203', 'step': 21798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:31.792014', 'step': 21798, 'epoch': 3} {'type': 'loss', 'content': 0.06537478417158127, 'timestamp': '2025-09-10 02:51:31.795242', 'step': 21799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:31.828499', 'step': 21799, 'epoch': 3} {'type': 'loss', 'content': 0.01506250910460949, 'timestamp': '2025-09-10 02:51:31.852517', 'step': 21800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:31.883814', 'step': 21800, 'epoch': 3} {'type': 'loss', 'content': 0.06548448652029037, 'timestamp': '2025-09-10 02:51:31.886824', 'step': 21801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:31.917469', 'step': 21801, 'epoch': 3} {'type': 'loss', 'content': 0.12195999175310135, 'timestamp': '2025-09-10 02:51:31.920979', 'step': 21802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:31.952748', 'step': 21802, 'epoch': 3} {'type': 'loss', 'content': 0.036077406257390976, 'timestamp': '2025-09-10 02:51:31.955475', 'step': 21803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:31.986372', 'step': 21803, 'epoch': 3} {'type': 'loss', 'content': 0.09071700274944305, 'timestamp': '2025-09-10 02:51:32.010232', 'step': 21804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:32.041279', 'step': 21804, 'epoch': 3} {'type': 'loss', 'content': 0.05922522395849228, 'timestamp': '2025-09-10 02:51:32.043547', 'step': 21805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:32.073945', 'step': 21805, 'epoch': 3} {'type': 'loss', 'content': 0.06761696189641953, 'timestamp': '2025-09-10 02:51:32.076665', 'step': 21806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:32.107622', 'step': 21806, 'epoch': 3} {'type': 'loss', 'content': 0.02202010340988636, 'timestamp': '2025-09-10 02:51:32.110760', 'step': 21807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:32.142197', 'step': 21807, 'epoch': 3} {'type': 'loss', 'content': 0.03281373903155327, 'timestamp': '2025-09-10 02:51:32.167496', 'step': 21808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:32.197973', 'step': 21808, 'epoch': 3} {'type': 'loss', 'content': 0.019716063514351845, 'timestamp': '2025-09-10 02:51:32.200237', 'step': 21809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:32.233350', 'step': 21809, 'epoch': 3} {'type': 'loss', 'content': 0.06189938262104988, 'timestamp': '2025-09-10 02:51:32.236022', 'step': 21810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:32.266294', 'step': 21810, 'epoch': 3} {'type': 'loss', 'content': 0.06692478060722351, 'timestamp': '2025-09-10 02:51:32.268532', 'step': 21811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:32.298819', 'step': 21811, 'epoch': 3} {'type': 'loss', 'content': 0.0798199474811554, 'timestamp': '2025-09-10 02:51:32.322914', 'step': 21812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:32.353279', 'step': 21812, 'epoch': 3} {'type': 'loss', 'content': 0.12983210384845734, 'timestamp': '2025-09-10 02:51:32.356023', 'step': 21813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:32.386880', 'step': 21813, 'epoch': 3} {'type': 'loss', 'content': 0.032266765832901, 'timestamp': '2025-09-10 02:51:32.389404', 'step': 21814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:32.420608', 'step': 21814, 'epoch': 3} {'type': 'loss', 'content': 0.05048685148358345, 'timestamp': '2025-09-10 02:51:32.423180', 'step': 21815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:32.453216', 'step': 21815, 'epoch': 3} {'type': 'loss', 'content': 0.04301122575998306, 'timestamp': '2025-09-10 02:51:32.476787', 'step': 21816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:32.507065', 'step': 21816, 'epoch': 3} {'type': 'loss', 'content': 0.08953505754470825, 'timestamp': '2025-09-10 02:51:32.509334', 'step': 21817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:32.539949', 'step': 21817, 'epoch': 3} {'type': 'loss', 'content': 0.06178548187017441, 'timestamp': '2025-09-10 02:51:32.542559', 'step': 21818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:32.572406', 'step': 21818, 'epoch': 3} {'type': 'loss', 'content': 0.06625448912382126, 'timestamp': '2025-09-10 02:51:32.575350', 'step': 21819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:32.605610', 'step': 21819, 'epoch': 3} {'type': 'loss', 'content': 0.03846060857176781, 'timestamp': '2025-09-10 02:51:32.630742', 'step': 21820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:32.661533', 'step': 21820, 'epoch': 3} {'type': 'loss', 'content': 0.0267277080565691, 'timestamp': '2025-09-10 02:51:32.664181', 'step': 21821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:32.693897', 'step': 21821, 'epoch': 3} {'type': 'loss', 'content': 0.12603241205215454, 'timestamp': '2025-09-10 02:51:32.696427', 'step': 21822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:32.728272', 'step': 21822, 'epoch': 3} {'type': 'loss', 'content': 0.09993153065443039, 'timestamp': '2025-09-10 02:51:32.731315', 'step': 21823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:32.761699', 'step': 21823, 'epoch': 3} {'type': 'loss', 'content': 0.03196767345070839, 'timestamp': '2025-09-10 02:51:32.785372', 'step': 21824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:32.819132', 'step': 21824, 'epoch': 3} {'type': 'loss', 'content': 0.0843978300690651, 'timestamp': '2025-09-10 02:51:32.821780', 'step': 21825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:32.851784', 'step': 21825, 'epoch': 3} {'type': 'loss', 'content': 0.07983539253473282, 'timestamp': '2025-09-10 02:51:32.854585', 'step': 21826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:32.885109', 'step': 21826, 'epoch': 3} {'type': 'loss', 'content': 0.046186354011297226, 'timestamp': '2025-09-10 02:51:32.888322', 'step': 21827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:32.918479', 'step': 21827, 'epoch': 3} {'type': 'loss', 'content': 0.0244282279163599, 'timestamp': '2025-09-10 02:51:32.942031', 'step': 21828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:32.973532', 'step': 21828, 'epoch': 3} {'type': 'loss', 'content': 0.039344027638435364, 'timestamp': '2025-09-10 02:51:32.975989', 'step': 21829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:33.005565', 'step': 21829, 'epoch': 3} {'type': 'loss', 'content': 0.12311585247516632, 'timestamp': '2025-09-10 02:51:33.008199', 'step': 21830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:33.042049', 'step': 21830, 'epoch': 3} {'type': 'loss', 'content': 0.05246430262923241, 'timestamp': '2025-09-10 02:51:33.045398', 'step': 21831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:33.076780', 'step': 21831, 'epoch': 3} {'type': 'loss', 'content': 0.059353068470954895, 'timestamp': '2025-09-10 02:51:33.101198', 'step': 21832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:33.134898', 'step': 21832, 'epoch': 3} {'type': 'loss', 'content': 0.03390612080693245, 'timestamp': '2025-09-10 02:51:33.137469', 'step': 21833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:33.167847', 'step': 21833, 'epoch': 3} {'type': 'loss', 'content': 0.046762265264987946, 'timestamp': '2025-09-10 02:51:33.170588', 'step': 21834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:33.202079', 'step': 21834, 'epoch': 3} {'type': 'loss', 'content': 0.09301510453224182, 'timestamp': '2025-09-10 02:51:33.204792', 'step': 21835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:33.237371', 'step': 21835, 'epoch': 3} {'type': 'loss', 'content': 0.05817576125264168, 'timestamp': '2025-09-10 02:51:33.260891', 'step': 21836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:33.292242', 'step': 21836, 'epoch': 3} {'type': 'loss', 'content': 0.06828583031892776, 'timestamp': '2025-09-10 02:51:33.295194', 'step': 21837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:33.326263', 'step': 21837, 'epoch': 3} {'type': 'loss', 'content': 0.028770387172698975, 'timestamp': '2025-09-10 02:51:33.328755', 'step': 21838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:33.359276', 'step': 21838, 'epoch': 3} {'type': 'loss', 'content': 0.09734642505645752, 'timestamp': '2025-09-10 02:51:33.361369', 'step': 21839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:33.392346', 'step': 21839, 'epoch': 3} {'type': 'loss', 'content': 0.04803350567817688, 'timestamp': '2025-09-10 02:51:33.416232', 'step': 21840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:33.447658', 'step': 21840, 'epoch': 3} {'type': 'loss', 'content': 0.006555842235684395, 'timestamp': '2025-09-10 02:51:33.450071', 'step': 21841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:33.480565', 'step': 21841, 'epoch': 3} {'type': 'loss', 'content': 0.04062884300947189, 'timestamp': '2025-09-10 02:51:33.483201', 'step': 21842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:33.512922', 'step': 21842, 'epoch': 3} {'type': 'loss', 'content': 0.06830890476703644, 'timestamp': '2025-09-10 02:51:33.515409', 'step': 21843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:33.547621', 'step': 21843, 'epoch': 3} {'type': 'loss', 'content': 0.06359478086233139, 'timestamp': '2025-09-10 02:51:33.571604', 'step': 21844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:33.602633', 'step': 21844, 'epoch': 3} {'type': 'loss', 'content': 0.05851895362138748, 'timestamp': '2025-09-10 02:51:33.605235', 'step': 21845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:33.636089', 'step': 21845, 'epoch': 3} {'type': 'loss', 'content': 0.0804998055100441, 'timestamp': '2025-09-10 02:51:33.640619', 'step': 21846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:33.671259', 'step': 21846, 'epoch': 3} {'type': 'loss', 'content': 0.031411007046699524, 'timestamp': '2025-09-10 02:51:33.673615', 'step': 21847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:33.703822', 'step': 21847, 'epoch': 3} {'type': 'loss', 'content': 0.06626345217227936, 'timestamp': '2025-09-10 02:51:33.727101', 'step': 21848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:33.757233', 'step': 21848, 'epoch': 3} {'type': 'loss', 'content': 0.08619300276041031, 'timestamp': '2025-09-10 02:51:33.759564', 'step': 21849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:33.789989', 'step': 21849, 'epoch': 3} {'type': 'loss', 'content': 0.0719529464840889, 'timestamp': '2025-09-10 02:51:33.792378', 'step': 21850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:33.843743', 'step': 21850, 'epoch': 3} {'type': 'loss', 'content': 0.0473310612142086, 'timestamp': '2025-09-10 02:51:33.858966', 'step': 21851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:33.894187', 'step': 21851, 'epoch': 3} {'type': 'loss', 'content': 0.024200651794672012, 'timestamp': '2025-09-10 02:51:33.918685', 'step': 21852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:33.952815', 'step': 21852, 'epoch': 3} {'type': 'loss', 'content': 0.025423236191272736, 'timestamp': '2025-09-10 02:51:33.955596', 'step': 21853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:33.987400', 'step': 21853, 'epoch': 3} {'type': 'loss', 'content': 0.07401464134454727, 'timestamp': '2025-09-10 02:51:33.989922', 'step': 21854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:34.019902', 'step': 21854, 'epoch': 3} {'type': 'loss', 'content': 0.045490678399801254, 'timestamp': '2025-09-10 02:51:34.022319', 'step': 21855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:34.053492', 'step': 21855, 'epoch': 3} {'type': 'loss', 'content': 0.06625373661518097, 'timestamp': '2025-09-10 02:51:34.077022', 'step': 21856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.107285', 'step': 21856, 'epoch': 3} {'type': 'loss', 'content': 0.020241813734173775, 'timestamp': '2025-09-10 02:51:34.109494', 'step': 21857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:34.139361', 'step': 21857, 'epoch': 3} {'type': 'loss', 'content': 0.0840764045715332, 'timestamp': '2025-09-10 02:51:34.142168', 'step': 21858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:51:34.172933', 'step': 21858, 'epoch': 3} {'type': 'loss', 'content': 0.03466011956334114, 'timestamp': '2025-09-10 02:51:34.177923', 'step': 21859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.208569', 'step': 21859, 'epoch': 3} {'type': 'loss', 'content': 0.041360508650541306, 'timestamp': '2025-09-10 02:51:34.232184', 'step': 21860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.262350', 'step': 21860, 'epoch': 3} {'type': 'loss', 'content': 0.09180279076099396, 'timestamp': '2025-09-10 02:51:34.264870', 'step': 21861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.295298', 'step': 21861, 'epoch': 3} {'type': 'loss', 'content': 0.036750469356775284, 'timestamp': '2025-09-10 02:51:34.297610', 'step': 21862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.328699', 'step': 21862, 'epoch': 3} {'type': 'loss', 'content': 0.11185698956251144, 'timestamp': '2025-09-10 02:51:34.333631', 'step': 21863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:34.366524', 'step': 21863, 'epoch': 3} {'type': 'loss', 'content': 0.04353202506899834, 'timestamp': '2025-09-10 02:51:34.392105', 'step': 21864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.423092', 'step': 21864, 'epoch': 3} {'type': 'loss', 'content': 0.027375098317861557, 'timestamp': '2025-09-10 02:51:34.426510', 'step': 21865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.459123', 'step': 21865, 'epoch': 3} {'type': 'loss', 'content': 0.028214508667588234, 'timestamp': '2025-09-10 02:51:34.461840', 'step': 21866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.492352', 'step': 21866, 'epoch': 3} {'type': 'loss', 'content': 0.04412531107664108, 'timestamp': '2025-09-10 02:51:34.495419', 'step': 21867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:34.526419', 'step': 21867, 'epoch': 3} {'type': 'loss', 'content': 0.009788628667593002, 'timestamp': '2025-09-10 02:51:34.551039', 'step': 21868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.582956', 'step': 21868, 'epoch': 3} {'type': 'loss', 'content': 0.07143702358007431, 'timestamp': '2025-09-10 02:51:34.585982', 'step': 21869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:34.617031', 'step': 21869, 'epoch': 3} {'type': 'loss', 'content': 0.07776399701833725, 'timestamp': '2025-09-10 02:51:34.619364', 'step': 21870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:34.649149', 'step': 21870, 'epoch': 3} {'type': 'loss', 'content': 0.06953456997871399, 'timestamp': '2025-09-10 02:51:34.651667', 'step': 21871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:34.681625', 'step': 21871, 'epoch': 3} {'type': 'loss', 'content': 0.07218705117702484, 'timestamp': '2025-09-10 02:51:34.705556', 'step': 21872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.735328', 'step': 21872, 'epoch': 3} {'type': 'loss', 'content': 0.09113845974206924, 'timestamp': '2025-09-10 02:51:34.738239', 'step': 21873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:34.768959', 'step': 21873, 'epoch': 3} {'type': 'loss', 'content': 0.05717834085226059, 'timestamp': '2025-09-10 02:51:34.771248', 'step': 21874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:34.801015', 'step': 21874, 'epoch': 3} {'type': 'loss', 'content': 0.17167432606220245, 'timestamp': '2025-09-10 02:51:34.803792', 'step': 21875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:34.838577', 'step': 21875, 'epoch': 3} {'type': 'loss', 'content': 0.037022847682237625, 'timestamp': '2025-09-10 02:51:34.867580', 'step': 21876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.899277', 'step': 21876, 'epoch': 3} {'type': 'loss', 'content': 0.15621232986450195, 'timestamp': '2025-09-10 02:51:34.901618', 'step': 21877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:34.931609', 'step': 21877, 'epoch': 3} {'type': 'loss', 'content': 0.04084455221891403, 'timestamp': '2025-09-10 02:51:34.934247', 'step': 21878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:34.970222', 'step': 21878, 'epoch': 3} {'type': 'loss', 'content': 0.07922258973121643, 'timestamp': '2025-09-10 02:51:34.972758', 'step': 21879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:35.008065', 'step': 21879, 'epoch': 3} {'type': 'loss', 'content': 0.06825519353151321, 'timestamp': '2025-09-10 02:51:35.032141', 'step': 21880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.072167', 'step': 21880, 'epoch': 3} {'type': 'loss', 'content': 0.08675069361925125, 'timestamp': '2025-09-10 02:51:35.074596', 'step': 21881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:35.105441', 'step': 21881, 'epoch': 3} {'type': 'loss', 'content': 0.09475212544202805, 'timestamp': '2025-09-10 02:51:35.107565', 'step': 21882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.137150', 'step': 21882, 'epoch': 3} {'type': 'loss', 'content': 0.05868116766214371, 'timestamp': '2025-09-10 02:51:35.139635', 'step': 21883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.170643', 'step': 21883, 'epoch': 3} {'type': 'loss', 'content': 0.016868988052010536, 'timestamp': '2025-09-10 02:51:35.196329', 'step': 21884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:35.241161', 'step': 21884, 'epoch': 3} {'type': 'loss', 'content': 0.1294499635696411, 'timestamp': '2025-09-10 02:51:35.244615', 'step': 21885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.275539', 'step': 21885, 'epoch': 3} {'type': 'loss', 'content': 0.010755090042948723, 'timestamp': '2025-09-10 02:51:35.277782', 'step': 21886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:35.307738', 'step': 21886, 'epoch': 3} {'type': 'loss', 'content': 0.08824599534273148, 'timestamp': '2025-09-10 02:51:35.310140', 'step': 21887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:35.339612', 'step': 21887, 'epoch': 3} {'type': 'loss', 'content': 0.07674263417720795, 'timestamp': '2025-09-10 02:51:35.363509', 'step': 21888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:35.393809', 'step': 21888, 'epoch': 3} {'type': 'loss', 'content': 0.018380070105195045, 'timestamp': '2025-09-10 02:51:35.396791', 'step': 21889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.433233', 'step': 21889, 'epoch': 3} {'type': 'loss', 'content': 0.05195728689432144, 'timestamp': '2025-09-10 02:51:35.435749', 'step': 21890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:35.466011', 'step': 21890, 'epoch': 3} {'type': 'loss', 'content': 0.08698713034391403, 'timestamp': '2025-09-10 02:51:35.468451', 'step': 21891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.498606', 'step': 21891, 'epoch': 3} {'type': 'loss', 'content': 0.09845040738582611, 'timestamp': '2025-09-10 02:51:35.522285', 'step': 21892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:35.552936', 'step': 21892, 'epoch': 3} {'type': 'loss', 'content': 0.09739906340837479, 'timestamp': '2025-09-10 02:51:35.555543', 'step': 21893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:35.588133', 'step': 21893, 'epoch': 3} {'type': 'loss', 'content': 0.08423153311014175, 'timestamp': '2025-09-10 02:51:35.592513', 'step': 21894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:35.627219', 'step': 21894, 'epoch': 3} {'type': 'loss', 'content': 0.050907768309116364, 'timestamp': '2025-09-10 02:51:35.629598', 'step': 21895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.661667', 'step': 21895, 'epoch': 3} {'type': 'loss', 'content': 0.034685682505369186, 'timestamp': '2025-09-10 02:51:35.685232', 'step': 21896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.717404', 'step': 21896, 'epoch': 3} {'type': 'loss', 'content': 0.057521749287843704, 'timestamp': '2025-09-10 02:51:35.720520', 'step': 21897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.751454', 'step': 21897, 'epoch': 3} {'type': 'loss', 'content': 0.13652803003787994, 'timestamp': '2025-09-10 02:51:35.754125', 'step': 21898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:35.784212', 'step': 21898, 'epoch': 3} {'type': 'loss', 'content': 0.05435195192694664, 'timestamp': '2025-09-10 02:51:35.786950', 'step': 21899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:35.818207', 'step': 21899, 'epoch': 3} {'type': 'loss', 'content': 0.0883500948548317, 'timestamp': '2025-09-10 02:51:35.842269', 'step': 21900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:35.875024', 'step': 21900, 'epoch': 3} {'type': 'loss', 'content': 0.06233435124158859, 'timestamp': '2025-09-10 02:51:35.877300', 'step': 21901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:35.907665', 'step': 21901, 'epoch': 3} {'type': 'loss', 'content': 0.020380442962050438, 'timestamp': '2025-09-10 02:51:35.910246', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:51:43.979956', 'step': 21902, 'epoch': 3} {'type': 'pplx', 'content': 9517.601019934196, 'timestamp': '2025-09-10 02:51:43.983368', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.014045', 'step': 21902, 'epoch': 3} {'type': 'loss', 'content': 0.10512462258338928, 'timestamp': '2025-09-10 02:51:44.016909', 'step': 21903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:44.047729', 'step': 21903, 'epoch': 3} {'type': 'loss', 'content': 0.012626130133867264, 'timestamp': '2025-09-10 02:51:44.071870', 'step': 21904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:44.101987', 'step': 21904, 'epoch': 3} {'type': 'loss', 'content': 0.028651997447013855, 'timestamp': '2025-09-10 02:51:44.104348', 'step': 21905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:44.135132', 'step': 21905, 'epoch': 3} {'type': 'loss', 'content': 0.10069539397954941, 'timestamp': '2025-09-10 02:51:44.137656', 'step': 21906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.168294', 'step': 21906, 'epoch': 3} {'type': 'loss', 'content': 0.10789051651954651, 'timestamp': '2025-09-10 02:51:44.171154', 'step': 21907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:44.202379', 'step': 21907, 'epoch': 3} {'type': 'loss', 'content': 0.06280583888292313, 'timestamp': '2025-09-10 02:51:44.226105', 'step': 21908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:44.256968', 'step': 21908, 'epoch': 3} {'type': 'loss', 'content': 0.027331266552209854, 'timestamp': '2025-09-10 02:51:44.260492', 'step': 21909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.290456', 'step': 21909, 'epoch': 3} {'type': 'loss', 'content': 0.06187411770224571, 'timestamp': '2025-09-10 02:51:44.292541', 'step': 21910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.322951', 'step': 21910, 'epoch': 3} {'type': 'loss', 'content': 0.08835982531309128, 'timestamp': '2025-09-10 02:51:44.325327', 'step': 21911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.356087', 'step': 21911, 'epoch': 3} {'type': 'loss', 'content': 0.052156370133161545, 'timestamp': '2025-09-10 02:51:44.381534', 'step': 21912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:44.414014', 'step': 21912, 'epoch': 3} {'type': 'loss', 'content': 0.08420375734567642, 'timestamp': '2025-09-10 02:51:44.416453', 'step': 21913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:44.446572', 'step': 21913, 'epoch': 3} {'type': 'loss', 'content': 0.01990821398794651, 'timestamp': '2025-09-10 02:51:44.449198', 'step': 21914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:44.479899', 'step': 21914, 'epoch': 3} {'type': 'loss', 'content': 0.12428222596645355, 'timestamp': '2025-09-10 02:51:44.482351', 'step': 21915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.512164', 'step': 21915, 'epoch': 3} {'type': 'loss', 'content': 0.04897972568869591, 'timestamp': '2025-09-10 02:51:44.535850', 'step': 21916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:44.566279', 'step': 21916, 'epoch': 3} {'type': 'loss', 'content': 0.044754307717084885, 'timestamp': '2025-09-10 02:51:44.568988', 'step': 21917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:44.599433', 'step': 21917, 'epoch': 3} {'type': 'loss', 'content': 0.10040029138326645, 'timestamp': '2025-09-10 02:51:44.601755', 'step': 21918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:44.632207', 'step': 21918, 'epoch': 3} {'type': 'loss', 'content': 0.047812722623348236, 'timestamp': '2025-09-10 02:51:44.634569', 'step': 21919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.664698', 'step': 21919, 'epoch': 3} {'type': 'loss', 'content': 0.07340185344219208, 'timestamp': '2025-09-10 02:51:44.689394', 'step': 21920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.719574', 'step': 21920, 'epoch': 3} {'type': 'loss', 'content': 0.012658457271754742, 'timestamp': '2025-09-10 02:51:44.722179', 'step': 21921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:44.752118', 'step': 21921, 'epoch': 3} {'type': 'loss', 'content': 0.04879777878522873, 'timestamp': '2025-09-10 02:51:44.754118', 'step': 21922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:44.784186', 'step': 21922, 'epoch': 3} {'type': 'loss', 'content': 0.0862298458814621, 'timestamp': '2025-09-10 02:51:44.786838', 'step': 21923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:44.817209', 'step': 21923, 'epoch': 3} {'type': 'loss', 'content': 0.07628273963928223, 'timestamp': '2025-09-10 02:51:44.840883', 'step': 21924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.878542', 'step': 21924, 'epoch': 3} {'type': 'loss', 'content': 0.05344904586672783, 'timestamp': '2025-09-10 02:51:44.881661', 'step': 21925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.913039', 'step': 21925, 'epoch': 3} {'type': 'loss', 'content': 0.05544475466012955, 'timestamp': '2025-09-10 02:51:44.917254', 'step': 21926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:44.948331', 'step': 21926, 'epoch': 3} {'type': 'loss', 'content': 0.031938858330249786, 'timestamp': '2025-09-10 02:51:44.950823', 'step': 21927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:44.981198', 'step': 21927, 'epoch': 3} {'type': 'loss', 'content': 0.03822816535830498, 'timestamp': '2025-09-10 02:51:45.005134', 'step': 21928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:45.035352', 'step': 21928, 'epoch': 3} {'type': 'loss', 'content': 0.08445654064416885, 'timestamp': '2025-09-10 02:51:45.038381', 'step': 21929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:45.068796', 'step': 21929, 'epoch': 3} {'type': 'loss', 'content': 0.009395075030624866, 'timestamp': '2025-09-10 02:51:45.071811', 'step': 21930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:45.102012', 'step': 21930, 'epoch': 3} {'type': 'loss', 'content': 0.05017726868391037, 'timestamp': '2025-09-10 02:51:45.104771', 'step': 21931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:45.136087', 'step': 21931, 'epoch': 3} {'type': 'loss', 'content': 0.11109241843223572, 'timestamp': '2025-09-10 02:51:45.159743', 'step': 21932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:45.191050', 'step': 21932, 'epoch': 3} {'type': 'loss', 'content': 0.1028016209602356, 'timestamp': '2025-09-10 02:51:45.193463', 'step': 21933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:45.224585', 'step': 21933, 'epoch': 3} {'type': 'loss', 'content': 0.07070815563201904, 'timestamp': '2025-09-10 02:51:45.227102', 'step': 21934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:45.257552', 'step': 21934, 'epoch': 3} {'type': 'loss', 'content': 0.05877655744552612, 'timestamp': '2025-09-10 02:51:45.260168', 'step': 21935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:45.290564', 'step': 21935, 'epoch': 3} {'type': 'loss', 'content': 0.08384577929973602, 'timestamp': '2025-09-10 02:51:45.314275', 'step': 21936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:45.346212', 'step': 21936, 'epoch': 3} {'type': 'loss', 'content': 0.09705030173063278, 'timestamp': '2025-09-10 02:51:45.348412', 'step': 21937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:45.378292', 'step': 21937, 'epoch': 3} {'type': 'loss', 'content': 0.10069376230239868, 'timestamp': '2025-09-10 02:51:45.380711', 'step': 21938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:45.412504', 'step': 21938, 'epoch': 3} {'type': 'loss', 'content': 0.05445318669080734, 'timestamp': '2025-09-10 02:51:45.414894', 'step': 21939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:45.447613', 'step': 21939, 'epoch': 3} {'type': 'loss', 'content': 0.013596453703939915, 'timestamp': '2025-09-10 02:51:45.471661', 'step': 21940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:45.504655', 'step': 21940, 'epoch': 3} {'type': 'loss', 'content': 0.09982555359601974, 'timestamp': '2025-09-10 02:51:45.507188', 'step': 21941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:45.537258', 'step': 21941, 'epoch': 3} {'type': 'loss', 'content': 0.041800227016210556, 'timestamp': '2025-09-10 02:51:45.539605', 'step': 21942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:45.571199', 'step': 21942, 'epoch': 3} {'type': 'loss', 'content': 0.052515339106321335, 'timestamp': '2025-09-10 02:51:45.573410', 'step': 21943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:45.605220', 'step': 21943, 'epoch': 3} {'type': 'loss', 'content': 0.020750219002366066, 'timestamp': '2025-09-10 02:51:45.628986', 'step': 21944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:45.660356', 'step': 21944, 'epoch': 3} {'type': 'loss', 'content': 0.09638185799121857, 'timestamp': '2025-09-10 02:51:45.678091', 'step': 21945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:45.712948', 'step': 21945, 'epoch': 3} {'type': 'loss', 'content': 0.021987712010741234, 'timestamp': '2025-09-10 02:51:45.715376', 'step': 21946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:45.745979', 'step': 21946, 'epoch': 3} {'type': 'loss', 'content': 0.06043664738535881, 'timestamp': '2025-09-10 02:51:45.748250', 'step': 21947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:45.778801', 'step': 21947, 'epoch': 3} {'type': 'loss', 'content': 0.03444678708910942, 'timestamp': '2025-09-10 02:51:45.802604', 'step': 21948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:45.833530', 'step': 21948, 'epoch': 3} {'type': 'loss', 'content': 0.05052249878644943, 'timestamp': '2025-09-10 02:51:45.840283', 'step': 21949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:45.874771', 'step': 21949, 'epoch': 3} {'type': 'loss', 'content': 0.07434272021055222, 'timestamp': '2025-09-10 02:51:45.877454', 'step': 21950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:45.909970', 'step': 21950, 'epoch': 3} {'type': 'loss', 'content': 0.013125399127602577, 'timestamp': '2025-09-10 02:51:45.912532', 'step': 21951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:45.945408', 'step': 21951, 'epoch': 3} {'type': 'loss', 'content': 0.04481877386569977, 'timestamp': '2025-09-10 02:51:45.969397', 'step': 21952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:46.000422', 'step': 21952, 'epoch': 3} {'type': 'loss', 'content': 0.04317472130060196, 'timestamp': '2025-09-10 02:51:46.003159', 'step': 21953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.033547', 'step': 21953, 'epoch': 3} {'type': 'loss', 'content': 0.06447470188140869, 'timestamp': '2025-09-10 02:51:46.035889', 'step': 21954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:46.066860', 'step': 21954, 'epoch': 3} {'type': 'loss', 'content': 0.059573642909526825, 'timestamp': '2025-09-10 02:51:46.070123', 'step': 21955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:46.100886', 'step': 21955, 'epoch': 3} {'type': 'loss', 'content': 0.03537241742014885, 'timestamp': '2025-09-10 02:51:46.124379', 'step': 21956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:46.155350', 'step': 21956, 'epoch': 3} {'type': 'loss', 'content': 0.10204726457595825, 'timestamp': '2025-09-10 02:51:46.157638', 'step': 21957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.188096', 'step': 21957, 'epoch': 3} {'type': 'loss', 'content': 0.05626342073082924, 'timestamp': '2025-09-10 02:51:46.190712', 'step': 21958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:46.222066', 'step': 21958, 'epoch': 3} {'type': 'loss', 'content': 0.059390828013420105, 'timestamp': '2025-09-10 02:51:46.224385', 'step': 21959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.254754', 'step': 21959, 'epoch': 3} {'type': 'loss', 'content': 0.05863770470023155, 'timestamp': '2025-09-10 02:51:46.278587', 'step': 21960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.311805', 'step': 21960, 'epoch': 3} {'type': 'loss', 'content': 0.07741416245698929, 'timestamp': '2025-09-10 02:51:46.314295', 'step': 21961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.344745', 'step': 21961, 'epoch': 3} {'type': 'loss', 'content': 0.048654668033123016, 'timestamp': '2025-09-10 02:51:46.347441', 'step': 21962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.378032', 'step': 21962, 'epoch': 3} {'type': 'loss', 'content': 0.02530672401189804, 'timestamp': '2025-09-10 02:51:46.380436', 'step': 21963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:46.412262', 'step': 21963, 'epoch': 3} {'type': 'loss', 'content': 0.029958415776491165, 'timestamp': '2025-09-10 02:51:46.435997', 'step': 21964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:46.465977', 'step': 21964, 'epoch': 3} {'type': 'loss', 'content': 0.05111118406057358, 'timestamp': '2025-09-10 02:51:46.468110', 'step': 21965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.497931', 'step': 21965, 'epoch': 3} {'type': 'loss', 'content': 0.054158300161361694, 'timestamp': '2025-09-10 02:51:46.500549', 'step': 21966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.532304', 'step': 21966, 'epoch': 3} {'type': 'loss', 'content': 0.05304035171866417, 'timestamp': '2025-09-10 02:51:46.535492', 'step': 21967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.566165', 'step': 21967, 'epoch': 3} {'type': 'loss', 'content': 0.06767118722200394, 'timestamp': '2025-09-10 02:51:46.589801', 'step': 21968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.619781', 'step': 21968, 'epoch': 3} {'type': 'loss', 'content': 0.051904935389757156, 'timestamp': '2025-09-10 02:51:46.622134', 'step': 21969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.652986', 'step': 21969, 'epoch': 3} {'type': 'loss', 'content': 0.01814335212111473, 'timestamp': '2025-09-10 02:51:46.655127', 'step': 21970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.684904', 'step': 21970, 'epoch': 3} {'type': 'loss', 'content': 0.09976515918970108, 'timestamp': '2025-09-10 02:51:46.687246', 'step': 21971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:46.719035', 'step': 21971, 'epoch': 3} {'type': 'loss', 'content': 0.10342683643102646, 'timestamp': '2025-09-10 02:51:46.742436', 'step': 21972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.774555', 'step': 21972, 'epoch': 3} {'type': 'loss', 'content': 0.11495492607355118, 'timestamp': '2025-09-10 02:51:46.777011', 'step': 21973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:46.807455', 'step': 21973, 'epoch': 3} {'type': 'loss', 'content': 0.037881214171648026, 'timestamp': '2025-09-10 02:51:46.810010', 'step': 21974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:46.839734', 'step': 21974, 'epoch': 3} {'type': 'loss', 'content': 0.10166388005018234, 'timestamp': '2025-09-10 02:51:46.842213', 'step': 21975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:46.878320', 'step': 21975, 'epoch': 3} {'type': 'loss', 'content': 0.06390227377414703, 'timestamp': '2025-09-10 02:51:46.901964', 'step': 21976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:46.933063', 'step': 21976, 'epoch': 3} {'type': 'loss', 'content': 0.12716631591320038, 'timestamp': '2025-09-10 02:51:46.935745', 'step': 21977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:46.970729', 'step': 21977, 'epoch': 3} {'type': 'loss', 'content': 0.10155420005321503, 'timestamp': '2025-09-10 02:51:46.973526', 'step': 21978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:47.003947', 'step': 21978, 'epoch': 3} {'type': 'loss', 'content': 0.1318223774433136, 'timestamp': '2025-09-10 02:51:47.007072', 'step': 21979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:47.037663', 'step': 21979, 'epoch': 3} {'type': 'loss', 'content': 0.1718980222940445, 'timestamp': '2025-09-10 02:51:47.061207', 'step': 21980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:47.092182', 'step': 21980, 'epoch': 3} {'type': 'loss', 'content': 0.045181307941675186, 'timestamp': '2025-09-10 02:51:47.096433', 'step': 21981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:47.127328', 'step': 21981, 'epoch': 3} {'type': 'loss', 'content': 0.1125045195221901, 'timestamp': '2025-09-10 02:51:47.129623', 'step': 21982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:47.160137', 'step': 21982, 'epoch': 3} {'type': 'loss', 'content': 0.14493224024772644, 'timestamp': '2025-09-10 02:51:47.162664', 'step': 21983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:47.192557', 'step': 21983, 'epoch': 3} {'type': 'loss', 'content': 0.07927881181240082, 'timestamp': '2025-09-10 02:51:47.216390', 'step': 21984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:47.246493', 'step': 21984, 'epoch': 3} {'type': 'loss', 'content': 0.07059475034475327, 'timestamp': '2025-09-10 02:51:47.249490', 'step': 21985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:47.278873', 'step': 21985, 'epoch': 3} {'type': 'loss', 'content': 0.1276375651359558, 'timestamp': '2025-09-10 02:51:47.281732', 'step': 21986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:47.312693', 'step': 21986, 'epoch': 3} {'type': 'loss', 'content': 0.11939626187086105, 'timestamp': '2025-09-10 02:51:47.317348', 'step': 21987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:47.346907', 'step': 21987, 'epoch': 3} {'type': 'loss', 'content': 0.028693901374936104, 'timestamp': '2025-09-10 02:51:47.370684', 'step': 21988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:47.400701', 'step': 21988, 'epoch': 3} {'type': 'loss', 'content': 0.05510260909795761, 'timestamp': '2025-09-10 02:51:47.403314', 'step': 21989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:47.433145', 'step': 21989, 'epoch': 3} {'type': 'loss', 'content': 0.07055305689573288, 'timestamp': '2025-09-10 02:51:47.435609', 'step': 21990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:47.465797', 'step': 21990, 'epoch': 3} {'type': 'loss', 'content': 0.07140496373176575, 'timestamp': '2025-09-10 02:51:47.468262', 'step': 21991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:47.498542', 'step': 21991, 'epoch': 3} {'type': 'loss', 'content': 0.0669826939702034, 'timestamp': '2025-09-10 02:51:47.523207', 'step': 21992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:47.552958', 'step': 21992, 'epoch': 3} {'type': 'loss', 'content': 0.031033650040626526, 'timestamp': '2025-09-10 02:51:47.556654', 'step': 21993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:47.587676', 'step': 21993, 'epoch': 3} {'type': 'loss', 'content': 0.10836031287908554, 'timestamp': '2025-09-10 02:51:47.590125', 'step': 21994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:47.620485', 'step': 21994, 'epoch': 3} {'type': 'loss', 'content': 0.039610352367162704, 'timestamp': '2025-09-10 02:51:47.622816', 'step': 21995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:47.652759', 'step': 21995, 'epoch': 3} {'type': 'loss', 'content': 0.05940196290612221, 'timestamp': '2025-09-10 02:51:47.676450', 'step': 21996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:47.706475', 'step': 21996, 'epoch': 3} {'type': 'loss', 'content': 0.033589623868465424, 'timestamp': '2025-09-10 02:51:47.709048', 'step': 21997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:47.737929', 'step': 21997, 'epoch': 3} {'type': 'loss', 'content': 0.11177320778369904, 'timestamp': '2025-09-10 02:51:47.740158', 'step': 21998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:47.770472', 'step': 21998, 'epoch': 3} {'type': 'loss', 'content': 0.06515859812498093, 'timestamp': '2025-09-10 02:51:47.772788', 'step': 21999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:47.802959', 'step': 21999, 'epoch': 3} {'type': 'loss', 'content': 0.029237661510705948, 'timestamp': '2025-09-10 02:51:47.828030', 'step': 22000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 22000', 'timestamp': '2025-09-10 02:51:52.567581', 'step': 22000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:52.601153', 'step': 22000, 'epoch': 3} {'type': 'loss', 'content': 0.09304770827293396, 'timestamp': '2025-09-10 02:51:52.604091', 'step': 22001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:52.636665', 'step': 22001, 'epoch': 3} {'type': 'loss', 'content': 0.02575220726430416, 'timestamp': '2025-09-10 02:51:52.639570', 'step': 22002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:52.671331', 'step': 22002, 'epoch': 3} {'type': 'loss', 'content': 0.07708899676799774, 'timestamp': '2025-09-10 02:51:52.673551', 'step': 22003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:52.704097', 'step': 22003, 'epoch': 3} {'type': 'loss', 'content': 0.07356996834278107, 'timestamp': '2025-09-10 02:51:52.728143', 'step': 22004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:52.759843', 'step': 22004, 'epoch': 3} {'type': 'loss', 'content': 0.11842591315507889, 'timestamp': '2025-09-10 02:51:52.762184', 'step': 22005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:52.793391', 'step': 22005, 'epoch': 3} {'type': 'loss', 'content': 0.08068397641181946, 'timestamp': '2025-09-10 02:51:52.795607', 'step': 22006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:52.827065', 'step': 22006, 'epoch': 3} {'type': 'loss', 'content': 0.06766559183597565, 'timestamp': '2025-09-10 02:51:52.829828', 'step': 22007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:52.860751', 'step': 22007, 'epoch': 3} {'type': 'loss', 'content': 0.0810583308339119, 'timestamp': '2025-09-10 02:51:52.886465', 'step': 22008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:52.917487', 'step': 22008, 'epoch': 3} {'type': 'loss', 'content': 0.0520886592566967, 'timestamp': '2025-09-10 02:51:52.919927', 'step': 22009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:52.953140', 'step': 22009, 'epoch': 3} {'type': 'loss', 'content': 0.0033588295336812735, 'timestamp': '2025-09-10 02:51:52.955754', 'step': 22010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:52.986457', 'step': 22010, 'epoch': 3} {'type': 'loss', 'content': 0.05818010866641998, 'timestamp': '2025-09-10 02:51:52.988913', 'step': 22011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:53.018517', 'step': 22011, 'epoch': 3} {'type': 'loss', 'content': 0.048839420080184937, 'timestamp': '2025-09-10 02:51:53.042313', 'step': 22012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:53.073120', 'step': 22012, 'epoch': 3} {'type': 'loss', 'content': 0.07182208448648453, 'timestamp': '2025-09-10 02:51:53.075704', 'step': 22013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:53.106574', 'step': 22013, 'epoch': 3} {'type': 'loss', 'content': 0.011254880577325821, 'timestamp': '2025-09-10 02:51:53.108803', 'step': 22014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:53.139805', 'step': 22014, 'epoch': 3} {'type': 'loss', 'content': 0.08843448013067245, 'timestamp': '2025-09-10 02:51:53.142241', 'step': 22015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:53.173373', 'step': 22015, 'epoch': 3} {'type': 'loss', 'content': 0.1049000546336174, 'timestamp': '2025-09-10 02:51:53.196734', 'step': 22016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:53.227261', 'step': 22016, 'epoch': 3} {'type': 'loss', 'content': 0.052786849439144135, 'timestamp': '2025-09-10 02:51:53.229668', 'step': 22017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:53.260024', 'step': 22017, 'epoch': 3} {'type': 'loss', 'content': 0.05330590531229973, 'timestamp': '2025-09-10 02:51:53.262678', 'step': 22018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:53.293601', 'step': 22018, 'epoch': 3} {'type': 'loss', 'content': 0.0415361151099205, 'timestamp': '2025-09-10 02:51:53.295958', 'step': 22019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:53.326522', 'step': 22019, 'epoch': 3} {'type': 'loss', 'content': 0.03294305503368378, 'timestamp': '2025-09-10 02:51:53.350201', 'step': 22020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:53.381287', 'step': 22020, 'epoch': 3} {'type': 'loss', 'content': 0.11015286296606064, 'timestamp': '2025-09-10 02:51:53.383808', 'step': 22021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:53.414812', 'step': 22021, 'epoch': 3} {'type': 'loss', 'content': 0.11367440223693848, 'timestamp': '2025-09-10 02:51:53.417463', 'step': 22022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:53.448896', 'step': 22022, 'epoch': 3} {'type': 'loss', 'content': 0.06677674502134323, 'timestamp': '2025-09-10 02:51:53.451262', 'step': 22023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:53.482011', 'step': 22023, 'epoch': 3} {'type': 'loss', 'content': 0.10871055722236633, 'timestamp': '2025-09-10 02:51:53.505751', 'step': 22024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:53.536306', 'step': 22024, 'epoch': 3} {'type': 'loss', 'content': 0.058936841785907745, 'timestamp': '2025-09-10 02:51:53.538808', 'step': 22025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:53.570265', 'step': 22025, 'epoch': 3} {'type': 'loss', 'content': 0.07578998804092407, 'timestamp': '2025-09-10 02:51:53.573081', 'step': 22026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:53.605931', 'step': 22026, 'epoch': 3} {'type': 'loss', 'content': 0.01200998667627573, 'timestamp': '2025-09-10 02:51:53.608496', 'step': 22027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:53.640292', 'step': 22027, 'epoch': 3} {'type': 'loss', 'content': 0.027090732008218765, 'timestamp': '2025-09-10 02:51:53.665428', 'step': 22028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:53.696410', 'step': 22028, 'epoch': 3} {'type': 'loss', 'content': 0.012108850292861462, 'timestamp': '2025-09-10 02:51:53.700237', 'step': 22029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:53.730962', 'step': 22029, 'epoch': 3} {'type': 'loss', 'content': 0.0662168487906456, 'timestamp': '2025-09-10 02:51:53.733160', 'step': 22030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:53.763877', 'step': 22030, 'epoch': 3} {'type': 'loss', 'content': 0.061635300517082214, 'timestamp': '2025-09-10 02:51:53.766352', 'step': 22031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:53.796662', 'step': 22031, 'epoch': 3} {'type': 'loss', 'content': 0.1046418771147728, 'timestamp': '2025-09-10 02:51:53.820540', 'step': 22032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:53.852543', 'step': 22032, 'epoch': 3} {'type': 'loss', 'content': 0.06695568561553955, 'timestamp': '2025-09-10 02:51:53.855262', 'step': 22033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:53.886459', 'step': 22033, 'epoch': 3} {'type': 'loss', 'content': 0.05094277858734131, 'timestamp': '2025-09-10 02:51:53.889109', 'step': 22034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:53.920897', 'step': 22034, 'epoch': 3} {'type': 'loss', 'content': 0.10095096379518509, 'timestamp': '2025-09-10 02:51:53.923320', 'step': 22035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:53.953239', 'step': 22035, 'epoch': 3} {'type': 'loss', 'content': 0.04350132867693901, 'timestamp': '2025-09-10 02:51:53.976764', 'step': 22036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:54.008505', 'step': 22036, 'epoch': 3} {'type': 'loss', 'content': 0.08371999859809875, 'timestamp': '2025-09-10 02:51:54.010770', 'step': 22037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:54.040680', 'step': 22037, 'epoch': 3} {'type': 'loss', 'content': 0.12392543256282806, 'timestamp': '2025-09-10 02:51:54.043188', 'step': 22038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:54.074155', 'step': 22038, 'epoch': 3} {'type': 'loss', 'content': 0.028340782970190048, 'timestamp': '2025-09-10 02:51:54.076526', 'step': 22039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:54.106934', 'step': 22039, 'epoch': 3} {'type': 'loss', 'content': 0.08701900392770767, 'timestamp': '2025-09-10 02:51:54.131008', 'step': 22040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:54.161328', 'step': 22040, 'epoch': 3} {'type': 'loss', 'content': 0.12310012429952621, 'timestamp': '2025-09-10 02:51:54.163757', 'step': 22041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:54.194959', 'step': 22041, 'epoch': 3} {'type': 'loss', 'content': 0.04433072730898857, 'timestamp': '2025-09-10 02:51:54.197283', 'step': 22042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:54.227196', 'step': 22042, 'epoch': 3} {'type': 'loss', 'content': 0.0324128083884716, 'timestamp': '2025-09-10 02:51:54.229655', 'step': 22043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:54.261559', 'step': 22043, 'epoch': 3} {'type': 'loss', 'content': 0.03798981383442879, 'timestamp': '2025-09-10 02:51:54.286464', 'step': 22044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:54.318962', 'step': 22044, 'epoch': 3} {'type': 'loss', 'content': 0.02290247566998005, 'timestamp': '2025-09-10 02:51:54.321243', 'step': 22045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:54.351437', 'step': 22045, 'epoch': 3} {'type': 'loss', 'content': 0.07595083117485046, 'timestamp': '2025-09-10 02:51:54.353841', 'step': 22046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:54.384902', 'step': 22046, 'epoch': 3} {'type': 'loss', 'content': 0.035565949976444244, 'timestamp': '2025-09-10 02:51:54.390154', 'step': 22047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:54.421023', 'step': 22047, 'epoch': 3} {'type': 'loss', 'content': 0.06249717250466347, 'timestamp': '2025-09-10 02:51:54.444554', 'step': 22048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:54.475207', 'step': 22048, 'epoch': 3} {'type': 'loss', 'content': 0.060099150985479355, 'timestamp': '2025-09-10 02:51:54.477603', 'step': 22049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:54.508794', 'step': 22049, 'epoch': 3} {'type': 'loss', 'content': 0.05310109257698059, 'timestamp': '2025-09-10 02:51:54.511100', 'step': 22050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:54.541393', 'step': 22050, 'epoch': 3} {'type': 'loss', 'content': 0.02893408201634884, 'timestamp': '2025-09-10 02:51:54.543972', 'step': 22051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:54.574367', 'step': 22051, 'epoch': 3} {'type': 'loss', 'content': 0.045944951474666595, 'timestamp': '2025-09-10 02:51:54.597863', 'step': 22052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:54.628478', 'step': 22052, 'epoch': 3} {'type': 'loss', 'content': 0.0541098490357399, 'timestamp': '2025-09-10 02:51:54.631190', 'step': 22053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:54.661294', 'step': 22053, 'epoch': 3} {'type': 'loss', 'content': 0.029077090322971344, 'timestamp': '2025-09-10 02:51:54.664764', 'step': 22054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:54.694778', 'step': 22054, 'epoch': 3} {'type': 'loss', 'content': 0.04704199731349945, 'timestamp': '2025-09-10 02:51:54.697864', 'step': 22055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:54.729344', 'step': 22055, 'epoch': 3} {'type': 'loss', 'content': 0.07154818624258041, 'timestamp': '2025-09-10 02:51:54.753221', 'step': 22056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:54.786633', 'step': 22056, 'epoch': 3} {'type': 'loss', 'content': 0.02810700796544552, 'timestamp': '2025-09-10 02:51:54.789256', 'step': 22057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:54.819884', 'step': 22057, 'epoch': 3} {'type': 'loss', 'content': 0.0320722833275795, 'timestamp': '2025-09-10 02:51:54.822743', 'step': 22058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:54.853525', 'step': 22058, 'epoch': 3} {'type': 'loss', 'content': 0.02268897369503975, 'timestamp': '2025-09-10 02:51:54.856030', 'step': 22059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:54.886370', 'step': 22059, 'epoch': 3} {'type': 'loss', 'content': 0.03383098542690277, 'timestamp': '2025-09-10 02:51:54.913246', 'step': 22060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:54.944128', 'step': 22060, 'epoch': 3} {'type': 'loss', 'content': 0.07662884145975113, 'timestamp': '2025-09-10 02:51:54.946317', 'step': 22061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:54.978433', 'step': 22061, 'epoch': 3} {'type': 'loss', 'content': 0.014948105439543724, 'timestamp': '2025-09-10 02:51:54.980753', 'step': 22062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:55.011656', 'step': 22062, 'epoch': 3} {'type': 'loss', 'content': 0.07454036921262741, 'timestamp': '2025-09-10 02:51:55.014110', 'step': 22063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:55.044165', 'step': 22063, 'epoch': 3} {'type': 'loss', 'content': 0.10739201307296753, 'timestamp': '2025-09-10 02:51:55.068366', 'step': 22064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:55.098616', 'step': 22064, 'epoch': 3} {'type': 'loss', 'content': 0.030623283237218857, 'timestamp': '2025-09-10 02:51:55.101513', 'step': 22065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:55.132487', 'step': 22065, 'epoch': 3} {'type': 'loss', 'content': 0.04053341597318649, 'timestamp': '2025-09-10 02:51:55.134908', 'step': 22066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:55.165355', 'step': 22066, 'epoch': 3} {'type': 'loss', 'content': 0.0404658205807209, 'timestamp': '2025-09-10 02:51:55.167636', 'step': 22067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:55.197332', 'step': 22067, 'epoch': 3} {'type': 'loss', 'content': 0.04689112678170204, 'timestamp': '2025-09-10 02:51:55.222542', 'step': 22068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:55.254738', 'step': 22068, 'epoch': 3} {'type': 'loss', 'content': 0.10051658749580383, 'timestamp': '2025-09-10 02:51:55.257461', 'step': 22069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:55.288738', 'step': 22069, 'epoch': 3} {'type': 'loss', 'content': 0.06257625669240952, 'timestamp': '2025-09-10 02:51:55.291242', 'step': 22070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:55.321818', 'step': 22070, 'epoch': 3} {'type': 'loss', 'content': 0.024496523663401604, 'timestamp': '2025-09-10 02:51:55.324253', 'step': 22071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:55.354165', 'step': 22071, 'epoch': 3} {'type': 'loss', 'content': 0.08286543190479279, 'timestamp': '2025-09-10 02:51:55.377548', 'step': 22072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:55.408393', 'step': 22072, 'epoch': 3} {'type': 'loss', 'content': 0.08649364113807678, 'timestamp': '2025-09-10 02:51:55.410611', 'step': 22073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:55.441530', 'step': 22073, 'epoch': 3} {'type': 'loss', 'content': 0.07076852768659592, 'timestamp': '2025-09-10 02:51:55.444174', 'step': 22074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:55.475022', 'step': 22074, 'epoch': 3} {'type': 'loss', 'content': 0.04873207211494446, 'timestamp': '2025-09-10 02:51:55.477723', 'step': 22075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:55.508510', 'step': 22075, 'epoch': 3} {'type': 'loss', 'content': 0.057214509695768356, 'timestamp': '2025-09-10 02:51:55.532294', 'step': 22076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:55.563749', 'step': 22076, 'epoch': 3} {'type': 'loss', 'content': 0.046702463179826736, 'timestamp': '2025-09-10 02:51:55.566184', 'step': 22077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:51:55.596517', 'step': 22077, 'epoch': 3} {'type': 'loss', 'content': 0.042508549988269806, 'timestamp': '2025-09-10 02:51:55.601308', 'step': 22078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:55.632524', 'step': 22078, 'epoch': 3} {'type': 'loss', 'content': 0.13030368089675903, 'timestamp': '2025-09-10 02:51:55.635184', 'step': 22079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:55.666748', 'step': 22079, 'epoch': 3} {'type': 'loss', 'content': 0.030251896008849144, 'timestamp': '2025-09-10 02:51:55.691068', 'step': 22080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:55.721878', 'step': 22080, 'epoch': 3} {'type': 'loss', 'content': 0.132343128323555, 'timestamp': '2025-09-10 02:51:55.724481', 'step': 22081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:55.754842', 'step': 22081, 'epoch': 3} {'type': 'loss', 'content': 0.08637100458145142, 'timestamp': '2025-09-10 02:51:55.757028', 'step': 22082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:55.788299', 'step': 22082, 'epoch': 3} {'type': 'loss', 'content': 0.10240063816308975, 'timestamp': '2025-09-10 02:51:55.790755', 'step': 22083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:55.821264', 'step': 22083, 'epoch': 3} {'type': 'loss', 'content': 0.07988343387842178, 'timestamp': '2025-09-10 02:51:55.845133', 'step': 22084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:55.875927', 'step': 22084, 'epoch': 3} {'type': 'loss', 'content': 0.06478527188301086, 'timestamp': '2025-09-10 02:51:55.879857', 'step': 22085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:55.909850', 'step': 22085, 'epoch': 3} {'type': 'loss', 'content': 0.041291870176792145, 'timestamp': '2025-09-10 02:51:55.912248', 'step': 22086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:55.942455', 'step': 22086, 'epoch': 3} {'type': 'loss', 'content': 0.14017844200134277, 'timestamp': '2025-09-10 02:51:55.945106', 'step': 22087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:55.978288', 'step': 22087, 'epoch': 3} {'type': 'loss', 'content': 0.049580685794353485, 'timestamp': '2025-09-10 02:51:56.001853', 'step': 22088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:56.034199', 'step': 22088, 'epoch': 3} {'type': 'loss', 'content': 0.11601738631725311, 'timestamp': '2025-09-10 02:51:56.036802', 'step': 22089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:56.066440', 'step': 22089, 'epoch': 3} {'type': 'loss', 'content': 0.07895785570144653, 'timestamp': '2025-09-10 02:51:56.069655', 'step': 22090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:56.100679', 'step': 22090, 'epoch': 3} {'type': 'loss', 'content': 0.062153033912181854, 'timestamp': '2025-09-10 02:51:56.103182', 'step': 22091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:56.135412', 'step': 22091, 'epoch': 3} {'type': 'loss', 'content': 0.051269855350255966, 'timestamp': '2025-09-10 02:51:56.159119', 'step': 22092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:56.190741', 'step': 22092, 'epoch': 3} {'type': 'loss', 'content': 0.025089070200920105, 'timestamp': '2025-09-10 02:51:56.193459', 'step': 22093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:56.224095', 'step': 22093, 'epoch': 3} {'type': 'loss', 'content': 0.0362737737596035, 'timestamp': '2025-09-10 02:51:56.226569', 'step': 22094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:56.258054', 'step': 22094, 'epoch': 3} {'type': 'loss', 'content': 0.06821898370981216, 'timestamp': '2025-09-10 02:51:56.260505', 'step': 22095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:56.291973', 'step': 22095, 'epoch': 3} {'type': 'loss', 'content': 0.07941563427448273, 'timestamp': '2025-09-10 02:51:56.315441', 'step': 22096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:56.361627', 'step': 22096, 'epoch': 3} {'type': 'loss', 'content': 0.15809521079063416, 'timestamp': '2025-09-10 02:51:56.366757', 'step': 22097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:56.436900', 'step': 22097, 'epoch': 3} {'type': 'loss', 'content': 0.05750645697116852, 'timestamp': '2025-09-10 02:51:56.456391', 'step': 22098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:56.532375', 'step': 22098, 'epoch': 3} {'type': 'loss', 'content': 0.040048450231552124, 'timestamp': '2025-09-10 02:51:56.539244', 'step': 22099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:56.579381', 'step': 22099, 'epoch': 3} {'type': 'loss', 'content': 0.06199165806174278, 'timestamp': '2025-09-10 02:51:56.603487', 'step': 22100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:56.635210', 'step': 22100, 'epoch': 3} {'type': 'loss', 'content': 0.11917993426322937, 'timestamp': '2025-09-10 02:51:56.638227', 'step': 22101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:56.669454', 'step': 22101, 'epoch': 3} {'type': 'loss', 'content': 0.08082467317581177, 'timestamp': '2025-09-10 02:51:56.672486', 'step': 22102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:56.703293', 'step': 22102, 'epoch': 3} {'type': 'loss', 'content': 0.054501213133335114, 'timestamp': '2025-09-10 02:51:56.705999', 'step': 22103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:56.738190', 'step': 22103, 'epoch': 3} {'type': 'loss', 'content': 0.12030395865440369, 'timestamp': '2025-09-10 02:51:56.761735', 'step': 22104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:56.794041', 'step': 22104, 'epoch': 3} {'type': 'loss', 'content': 0.11372553557157516, 'timestamp': '2025-09-10 02:51:56.796951', 'step': 22105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:56.827900', 'step': 22105, 'epoch': 3} {'type': 'loss', 'content': 0.03888837993144989, 'timestamp': '2025-09-10 02:51:56.831247', 'step': 22106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:56.862738', 'step': 22106, 'epoch': 3} {'type': 'loss', 'content': 0.09665735065937042, 'timestamp': '2025-09-10 02:51:56.866256', 'step': 22107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:56.907799', 'step': 22107, 'epoch': 3} {'type': 'loss', 'content': 0.06588267534971237, 'timestamp': '2025-09-10 02:51:56.932409', 'step': 22108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:56.966679', 'step': 22108, 'epoch': 3} {'type': 'loss', 'content': 0.07580086588859558, 'timestamp': '2025-09-10 02:51:56.969440', 'step': 22109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:57.000188', 'step': 22109, 'epoch': 3} {'type': 'loss', 'content': 0.06851960718631744, 'timestamp': '2025-09-10 02:51:57.002430', 'step': 22110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:57.034520', 'step': 22110, 'epoch': 3} {'type': 'loss', 'content': 0.056631170213222504, 'timestamp': '2025-09-10 02:51:57.037398', 'step': 22111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:57.068739', 'step': 22111, 'epoch': 3} {'type': 'loss', 'content': 0.10254628211259842, 'timestamp': '2025-09-10 02:51:57.092730', 'step': 22112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:57.124744', 'step': 22112, 'epoch': 3} {'type': 'loss', 'content': 0.10103090852499008, 'timestamp': '2025-09-10 02:51:57.129281', 'step': 22113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:57.163338', 'step': 22113, 'epoch': 3} {'type': 'loss', 'content': 0.06634283065795898, 'timestamp': '2025-09-10 02:51:57.165541', 'step': 22114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:57.195693', 'step': 22114, 'epoch': 3} {'type': 'loss', 'content': 0.10100260376930237, 'timestamp': '2025-09-10 02:51:57.198231', 'step': 22115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:57.229126', 'step': 22115, 'epoch': 3} {'type': 'loss', 'content': 0.07073008269071579, 'timestamp': '2025-09-10 02:51:57.253246', 'step': 22116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:57.285966', 'step': 22116, 'epoch': 3} {'type': 'loss', 'content': 0.07370555400848389, 'timestamp': '2025-09-10 02:51:57.288185', 'step': 22117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:57.319672', 'step': 22117, 'epoch': 3} {'type': 'loss', 'content': 0.020146507769823074, 'timestamp': '2025-09-10 02:51:57.321876', 'step': 22118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:57.356202', 'step': 22118, 'epoch': 3} {'type': 'loss', 'content': 0.05104820057749748, 'timestamp': '2025-09-10 02:51:57.358807', 'step': 22119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:57.395654', 'step': 22119, 'epoch': 3} {'type': 'loss', 'content': 0.04757446423172951, 'timestamp': '2025-09-10 02:51:57.421301', 'step': 22120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:57.454589', 'step': 22120, 'epoch': 3} {'type': 'loss', 'content': 0.03734028711915016, 'timestamp': '2025-09-10 02:51:57.457253', 'step': 22121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:57.490119', 'step': 22121, 'epoch': 3} {'type': 'loss', 'content': 0.08965589851140976, 'timestamp': '2025-09-10 02:51:57.492832', 'step': 22122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:57.526397', 'step': 22122, 'epoch': 3} {'type': 'loss', 'content': 0.17546729743480682, 'timestamp': '2025-09-10 02:51:57.529458', 'step': 22123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:57.563939', 'step': 22123, 'epoch': 3} {'type': 'loss', 'content': 0.05715806037187576, 'timestamp': '2025-09-10 02:51:57.587899', 'step': 22124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:57.622234', 'step': 22124, 'epoch': 3} {'type': 'loss', 'content': 0.08079181611537933, 'timestamp': '2025-09-10 02:51:57.625322', 'step': 22125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:57.660683', 'step': 22125, 'epoch': 3} {'type': 'loss', 'content': 0.021415894851088524, 'timestamp': '2025-09-10 02:51:57.664000', 'step': 22126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:57.695352', 'step': 22126, 'epoch': 3} {'type': 'loss', 'content': 0.1173483282327652, 'timestamp': '2025-09-10 02:51:57.697852', 'step': 22127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:57.728056', 'step': 22127, 'epoch': 3} {'type': 'loss', 'content': 0.06213172897696495, 'timestamp': '2025-09-10 02:51:57.751762', 'step': 22128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:57.783194', 'step': 22128, 'epoch': 3} {'type': 'loss', 'content': 0.03319559618830681, 'timestamp': '2025-09-10 02:51:57.785447', 'step': 22129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:57.816258', 'step': 22129, 'epoch': 3} {'type': 'loss', 'content': 0.058282606303691864, 'timestamp': '2025-09-10 02:51:57.820686', 'step': 22130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:57.851388', 'step': 22130, 'epoch': 3} {'type': 'loss', 'content': 0.022813228890299797, 'timestamp': '2025-09-10 02:51:57.853812', 'step': 22131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:57.884655', 'step': 22131, 'epoch': 3} {'type': 'loss', 'content': 0.03318553417921066, 'timestamp': '2025-09-10 02:51:57.908104', 'step': 22132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:57.939238', 'step': 22132, 'epoch': 3} {'type': 'loss', 'content': 0.0769626796245575, 'timestamp': '2025-09-10 02:51:57.941665', 'step': 22133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:57.972994', 'step': 22133, 'epoch': 3} {'type': 'loss', 'content': 0.11379409581422806, 'timestamp': '2025-09-10 02:51:57.975912', 'step': 22134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:58.006437', 'step': 22134, 'epoch': 3} {'type': 'loss', 'content': 0.046579305082559586, 'timestamp': '2025-09-10 02:51:58.008824', 'step': 22135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:58.039348', 'step': 22135, 'epoch': 3} {'type': 'loss', 'content': 0.06024990230798721, 'timestamp': '2025-09-10 02:51:58.063593', 'step': 22136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.095914', 'step': 22136, 'epoch': 3} {'type': 'loss', 'content': 0.0420508086681366, 'timestamp': '2025-09-10 02:51:58.098521', 'step': 22137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:58.129236', 'step': 22137, 'epoch': 3} {'type': 'loss', 'content': 0.040018048137426376, 'timestamp': '2025-09-10 02:51:58.131680', 'step': 22138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.162939', 'step': 22138, 'epoch': 3} {'type': 'loss', 'content': 0.02681858278810978, 'timestamp': '2025-09-10 02:51:58.165483', 'step': 22139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:58.197624', 'step': 22139, 'epoch': 3} {'type': 'loss', 'content': 0.09619931876659393, 'timestamp': '2025-09-10 02:51:58.221192', 'step': 22140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.252277', 'step': 22140, 'epoch': 3} {'type': 'loss', 'content': 0.042737364768981934, 'timestamp': '2025-09-10 02:51:58.254519', 'step': 22141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:58.286479', 'step': 22141, 'epoch': 3} {'type': 'loss', 'content': 0.04999866336584091, 'timestamp': '2025-09-10 02:51:58.288693', 'step': 22142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:58.319264', 'step': 22142, 'epoch': 3} {'type': 'loss', 'content': 0.06517823040485382, 'timestamp': '2025-09-10 02:51:58.321752', 'step': 22143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:51:58.351947', 'step': 22143, 'epoch': 3} {'type': 'loss', 'content': 0.06387463212013245, 'timestamp': '2025-09-10 02:51:58.375725', 'step': 22144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:58.406016', 'step': 22144, 'epoch': 3} {'type': 'loss', 'content': 0.09909646958112717, 'timestamp': '2025-09-10 02:51:58.408689', 'step': 22145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:58.439549', 'step': 22145, 'epoch': 3} {'type': 'loss', 'content': 0.0958806574344635, 'timestamp': '2025-09-10 02:51:58.441820', 'step': 22146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:58.474130', 'step': 22146, 'epoch': 3} {'type': 'loss', 'content': 0.05335201695561409, 'timestamp': '2025-09-10 02:51:58.476270', 'step': 22147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.505904', 'step': 22147, 'epoch': 3} {'type': 'loss', 'content': 0.018999867141246796, 'timestamp': '2025-09-10 02:51:58.529623', 'step': 22148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:51:58.560313', 'step': 22148, 'epoch': 3} {'type': 'loss', 'content': 0.05104285106062889, 'timestamp': '2025-09-10 02:51:58.562991', 'step': 22149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:58.593886', 'step': 22149, 'epoch': 3} {'type': 'loss', 'content': 0.057187337428331375, 'timestamp': '2025-09-10 02:51:58.596304', 'step': 22150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.626028', 'step': 22150, 'epoch': 3} {'type': 'loss', 'content': 0.09317896515130997, 'timestamp': '2025-09-10 02:51:58.629447', 'step': 22151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.660269', 'step': 22151, 'epoch': 3} {'type': 'loss', 'content': 0.024149924516677856, 'timestamp': '2025-09-10 02:51:58.683875', 'step': 22152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.714124', 'step': 22152, 'epoch': 3} {'type': 'loss', 'content': 0.05393052473664284, 'timestamp': '2025-09-10 02:51:58.716909', 'step': 22153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.746771', 'step': 22153, 'epoch': 3} {'type': 'loss', 'content': 0.03864069655537605, 'timestamp': '2025-09-10 02:51:58.750456', 'step': 22154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.780734', 'step': 22154, 'epoch': 3} {'type': 'loss', 'content': 0.012937289662659168, 'timestamp': '2025-09-10 02:51:58.782787', 'step': 22155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:58.813487', 'step': 22155, 'epoch': 3} {'type': 'loss', 'content': 0.014259591698646545, 'timestamp': '2025-09-10 02:51:58.837111', 'step': 22156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.867364', 'step': 22156, 'epoch': 3} {'type': 'loss', 'content': 0.031005552038550377, 'timestamp': '2025-09-10 02:51:58.869672', 'step': 22157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:58.901227', 'step': 22157, 'epoch': 3} {'type': 'loss', 'content': 0.02833903767168522, 'timestamp': '2025-09-10 02:51:58.903670', 'step': 22158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:58.934968', 'step': 22158, 'epoch': 3} {'type': 'loss', 'content': 0.11405602842569351, 'timestamp': '2025-09-10 02:51:58.937520', 'step': 22159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:58.968316', 'step': 22159, 'epoch': 3} {'type': 'loss', 'content': 0.13659046590328217, 'timestamp': '2025-09-10 02:51:58.992370', 'step': 22160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:51:59.023553', 'step': 22160, 'epoch': 3} {'type': 'loss', 'content': 0.06660667061805725, 'timestamp': '2025-09-10 02:51:59.026013', 'step': 22161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.056545', 'step': 22161, 'epoch': 3} {'type': 'loss', 'content': 0.04736163467168808, 'timestamp': '2025-09-10 02:51:59.060599', 'step': 22162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:59.092546', 'step': 22162, 'epoch': 3} {'type': 'loss', 'content': 0.17031152546405792, 'timestamp': '2025-09-10 02:51:59.095088', 'step': 22163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.124947', 'step': 22163, 'epoch': 3} {'type': 'loss', 'content': 0.05862906575202942, 'timestamp': '2025-09-10 02:51:59.148429', 'step': 22164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.179648', 'step': 22164, 'epoch': 3} {'type': 'loss', 'content': 0.09199181199073792, 'timestamp': '2025-09-10 02:51:59.181859', 'step': 22165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.211573', 'step': 22165, 'epoch': 3} {'type': 'loss', 'content': 0.10043106228113174, 'timestamp': '2025-09-10 02:51:59.214158', 'step': 22166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:59.244543', 'step': 22166, 'epoch': 3} {'type': 'loss', 'content': 0.03862127289175987, 'timestamp': '2025-09-10 02:51:59.246869', 'step': 22167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.277218', 'step': 22167, 'epoch': 3} {'type': 'loss', 'content': 0.10112235695123672, 'timestamp': '2025-09-10 02:51:59.301184', 'step': 22168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:59.331515', 'step': 22168, 'epoch': 3} {'type': 'loss', 'content': 0.03849316015839577, 'timestamp': '2025-09-10 02:51:59.334181', 'step': 22169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:59.364885', 'step': 22169, 'epoch': 3} {'type': 'loss', 'content': 0.06181098520755768, 'timestamp': '2025-09-10 02:51:59.367385', 'step': 22170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.399204', 'step': 22170, 'epoch': 3} {'type': 'loss', 'content': 0.033721987158060074, 'timestamp': '2025-09-10 02:51:59.401421', 'step': 22171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.433109', 'step': 22171, 'epoch': 3} {'type': 'loss', 'content': 0.12011799216270447, 'timestamp': '2025-09-10 02:51:59.458858', 'step': 22172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:59.490242', 'step': 22172, 'epoch': 3} {'type': 'loss', 'content': 0.12119185924530029, 'timestamp': '2025-09-10 02:51:59.492611', 'step': 22173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:51:59.524095', 'step': 22173, 'epoch': 3} {'type': 'loss', 'content': 0.07421983778476715, 'timestamp': '2025-09-10 02:51:59.526853', 'step': 22174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:59.557656', 'step': 22174, 'epoch': 3} {'type': 'loss', 'content': 0.03883352503180504, 'timestamp': '2025-09-10 02:51:59.561346', 'step': 22175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.591901', 'step': 22175, 'epoch': 3} {'type': 'loss', 'content': 0.0875248834490776, 'timestamp': '2025-09-10 02:51:59.615206', 'step': 22176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:59.645283', 'step': 22176, 'epoch': 3} {'type': 'loss', 'content': 0.026221156120300293, 'timestamp': '2025-09-10 02:51:59.648082', 'step': 22177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.678407', 'step': 22177, 'epoch': 3} {'type': 'loss', 'content': 0.04361405223608017, 'timestamp': '2025-09-10 02:51:59.681054', 'step': 22178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:51:59.713403', 'step': 22178, 'epoch': 3} {'type': 'loss', 'content': 0.08741121739149094, 'timestamp': '2025-09-10 02:51:59.716180', 'step': 22179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:59.747696', 'step': 22179, 'epoch': 3} {'type': 'loss', 'content': 0.12443185597658157, 'timestamp': '2025-09-10 02:51:59.771946', 'step': 22180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:59.802445', 'step': 22180, 'epoch': 3} {'type': 'loss', 'content': 0.09508330374956131, 'timestamp': '2025-09-10 02:51:59.804969', 'step': 22181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:51:59.849961', 'step': 22181, 'epoch': 3} {'type': 'loss', 'content': 0.026173710823059082, 'timestamp': '2025-09-10 02:51:59.852290', 'step': 22182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:59.881479', 'step': 22182, 'epoch': 3} {'type': 'loss', 'content': 0.030504876747727394, 'timestamp': '2025-09-10 02:51:59.885194', 'step': 22183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:59.919017', 'step': 22183, 'epoch': 3} {'type': 'loss', 'content': 0.07327505946159363, 'timestamp': '2025-09-10 02:51:59.942806', 'step': 22184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:51:59.973005', 'step': 22184, 'epoch': 3} {'type': 'loss', 'content': 0.09063907712697983, 'timestamp': '2025-09-10 02:51:59.975863', 'step': 22185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:00.006695', 'step': 22185, 'epoch': 3} {'type': 'loss', 'content': 0.09288380295038223, 'timestamp': '2025-09-10 02:52:00.008918', 'step': 22186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.039695', 'step': 22186, 'epoch': 3} {'type': 'loss', 'content': 0.020698172971606255, 'timestamp': '2025-09-10 02:52:00.042088', 'step': 22187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.071814', 'step': 22187, 'epoch': 3} {'type': 'loss', 'content': 0.07687801867723465, 'timestamp': '2025-09-10 02:52:00.095239', 'step': 22188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.126969', 'step': 22188, 'epoch': 3} {'type': 'loss', 'content': 0.06087272986769676, 'timestamp': '2025-09-10 02:52:00.129573', 'step': 22189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:00.159462', 'step': 22189, 'epoch': 3} {'type': 'loss', 'content': 0.028003517538309097, 'timestamp': '2025-09-10 02:52:00.161853', 'step': 22190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:00.192869', 'step': 22190, 'epoch': 3} {'type': 'loss', 'content': 0.11709176748991013, 'timestamp': '2025-09-10 02:52:00.195289', 'step': 22191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:00.226905', 'step': 22191, 'epoch': 3} {'type': 'loss', 'content': 0.07832345366477966, 'timestamp': '2025-09-10 02:52:00.250420', 'step': 22192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:00.282618', 'step': 22192, 'epoch': 3} {'type': 'loss', 'content': 0.09456484019756317, 'timestamp': '2025-09-10 02:52:00.284982', 'step': 22193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:00.314678', 'step': 22193, 'epoch': 3} {'type': 'loss', 'content': 0.05148365721106529, 'timestamp': '2025-09-10 02:52:00.316840', 'step': 22194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.347501', 'step': 22194, 'epoch': 3} {'type': 'loss', 'content': 0.02366517297923565, 'timestamp': '2025-09-10 02:52:00.350000', 'step': 22195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:00.380149', 'step': 22195, 'epoch': 3} {'type': 'loss', 'content': 0.12146294862031937, 'timestamp': '2025-09-10 02:52:00.403406', 'step': 22196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.434177', 'step': 22196, 'epoch': 3} {'type': 'loss', 'content': 0.01959707960486412, 'timestamp': '2025-09-10 02:52:00.436665', 'step': 22197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:00.470232', 'step': 22197, 'epoch': 3} {'type': 'loss', 'content': 0.07591113448143005, 'timestamp': '2025-09-10 02:52:00.473639', 'step': 22198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:00.506025', 'step': 22198, 'epoch': 3} {'type': 'loss', 'content': 0.028433209285140038, 'timestamp': '2025-09-10 02:52:00.508582', 'step': 22199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:00.540214', 'step': 22199, 'epoch': 3} {'type': 'loss', 'content': 0.028064876794815063, 'timestamp': '2025-09-10 02:52:00.563705', 'step': 22200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.595787', 'step': 22200, 'epoch': 3} {'type': 'loss', 'content': 0.13026729226112366, 'timestamp': '2025-09-10 02:52:00.598289', 'step': 22201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:00.628272', 'step': 22201, 'epoch': 3} {'type': 'loss', 'content': 0.04747450724244118, 'timestamp': '2025-09-10 02:52:00.630595', 'step': 22202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.661125', 'step': 22202, 'epoch': 3} {'type': 'loss', 'content': 0.055386580526828766, 'timestamp': '2025-09-10 02:52:00.663452', 'step': 22203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.693604', 'step': 22203, 'epoch': 3} {'type': 'loss', 'content': 0.0020672257523983717, 'timestamp': '2025-09-10 02:52:00.717217', 'step': 22204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:00.747986', 'step': 22204, 'epoch': 3} {'type': 'loss', 'content': 0.027563130483031273, 'timestamp': '2025-09-10 02:52:00.750372', 'step': 22205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:52:00.781687', 'step': 22205, 'epoch': 3} {'type': 'loss', 'content': 0.0613495297729969, 'timestamp': '2025-09-10 02:52:00.784411', 'step': 22206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.815309', 'step': 22206, 'epoch': 3} {'type': 'loss', 'content': 0.06062990427017212, 'timestamp': '2025-09-10 02:52:00.817713', 'step': 22207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:00.848395', 'step': 22207, 'epoch': 3} {'type': 'loss', 'content': 0.04836387187242508, 'timestamp': '2025-09-10 02:52:00.872288', 'step': 22208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:00.904431', 'step': 22208, 'epoch': 3} {'type': 'loss', 'content': 0.039009060710668564, 'timestamp': '2025-09-10 02:52:00.907182', 'step': 22209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:00.937671', 'step': 22209, 'epoch': 3} {'type': 'loss', 'content': 0.050898123532533646, 'timestamp': '2025-09-10 02:52:00.940407', 'step': 22210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:00.971034', 'step': 22210, 'epoch': 3} {'type': 'loss', 'content': 0.06723137944936752, 'timestamp': '2025-09-10 02:52:00.973717', 'step': 22211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.006465', 'step': 22211, 'epoch': 3} {'type': 'loss', 'content': 0.02107320725917816, 'timestamp': '2025-09-10 02:52:01.029846', 'step': 22212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.059931', 'step': 22212, 'epoch': 3} {'type': 'loss', 'content': 0.0743609145283699, 'timestamp': '2025-09-10 02:52:01.062396', 'step': 22213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.092972', 'step': 22213, 'epoch': 3} {'type': 'loss', 'content': 0.031216174364089966, 'timestamp': '2025-09-10 02:52:01.095437', 'step': 22214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.125298', 'step': 22214, 'epoch': 3} {'type': 'loss', 'content': 0.03097057342529297, 'timestamp': '2025-09-10 02:52:01.127463', 'step': 22215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.157882', 'step': 22215, 'epoch': 3} {'type': 'loss', 'content': 0.06797037273645401, 'timestamp': '2025-09-10 02:52:01.181411', 'step': 22216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:01.212769', 'step': 22216, 'epoch': 3} {'type': 'loss', 'content': 0.04306516796350479, 'timestamp': '2025-09-10 02:52:01.215125', 'step': 22217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.245466', 'step': 22217, 'epoch': 3} {'type': 'loss', 'content': 0.1097717210650444, 'timestamp': '2025-09-10 02:52:01.247547', 'step': 22218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.278112', 'step': 22218, 'epoch': 3} {'type': 'loss', 'content': 0.0776199921965599, 'timestamp': '2025-09-10 02:52:01.282640', 'step': 22219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:01.313617', 'step': 22219, 'epoch': 3} {'type': 'loss', 'content': 0.04063938930630684, 'timestamp': '2025-09-10 02:52:01.336947', 'step': 22220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:01.369031', 'step': 22220, 'epoch': 3} {'type': 'loss', 'content': 0.07413765043020248, 'timestamp': '2025-09-10 02:52:01.371538', 'step': 22221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.402432', 'step': 22221, 'epoch': 3} {'type': 'loss', 'content': 0.12205280363559723, 'timestamp': '2025-09-10 02:52:01.404826', 'step': 22222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:01.435307', 'step': 22222, 'epoch': 3} {'type': 'loss', 'content': 0.08457358181476593, 'timestamp': '2025-09-10 02:52:01.438092', 'step': 22223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:01.469151', 'step': 22223, 'epoch': 3} {'type': 'loss', 'content': 0.0023326457012444735, 'timestamp': '2025-09-10 02:52:01.492999', 'step': 22224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:01.523478', 'step': 22224, 'epoch': 3} {'type': 'loss', 'content': 0.021332193166017532, 'timestamp': '2025-09-10 02:52:01.526230', 'step': 22225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.556439', 'step': 22225, 'epoch': 3} {'type': 'loss', 'content': 0.07916621118783951, 'timestamp': '2025-09-10 02:52:01.558868', 'step': 22226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:01.589503', 'step': 22226, 'epoch': 3} {'type': 'loss', 'content': 0.03909948468208313, 'timestamp': '2025-09-10 02:52:01.591930', 'step': 22227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.623806', 'step': 22227, 'epoch': 3} {'type': 'loss', 'content': 0.07158499956130981, 'timestamp': '2025-09-10 02:52:01.648152', 'step': 22228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:01.678612', 'step': 22228, 'epoch': 3} {'type': 'loss', 'content': 0.04914125055074692, 'timestamp': '2025-09-10 02:52:01.682462', 'step': 22229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:01.714799', 'step': 22229, 'epoch': 3} {'type': 'loss', 'content': 0.05118254944682121, 'timestamp': '2025-09-10 02:52:01.717293', 'step': 22230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:01.749433', 'step': 22230, 'epoch': 3} {'type': 'loss', 'content': 0.04785056412220001, 'timestamp': '2025-09-10 02:52:01.752463', 'step': 22231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:01.783532', 'step': 22231, 'epoch': 3} {'type': 'loss', 'content': 0.08572326600551605, 'timestamp': '2025-09-10 02:52:01.807205', 'step': 22232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:52:01.838139', 'step': 22232, 'epoch': 3} {'type': 'loss', 'content': 0.09163034707307816, 'timestamp': '2025-09-10 02:52:01.840531', 'step': 22233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:01.872267', 'step': 22233, 'epoch': 3} {'type': 'loss', 'content': 0.062492430210113525, 'timestamp': '2025-09-10 02:52:01.875111', 'step': 22234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.905786', 'step': 22234, 'epoch': 3} {'type': 'loss', 'content': 0.05012006685137749, 'timestamp': '2025-09-10 02:52:01.908150', 'step': 22235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:01.938790', 'step': 22235, 'epoch': 3} {'type': 'loss', 'content': 0.039848484098911285, 'timestamp': '2025-09-10 02:52:01.962198', 'step': 22236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:01.993577', 'step': 22236, 'epoch': 3} {'type': 'loss', 'content': 0.06210678815841675, 'timestamp': '2025-09-10 02:52:01.995594', 'step': 22237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:02.025381', 'step': 22237, 'epoch': 3} {'type': 'loss', 'content': 0.059210337698459625, 'timestamp': '2025-09-10 02:52:02.028134', 'step': 22238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:02.058758', 'step': 22238, 'epoch': 3} {'type': 'loss', 'content': 0.04117860272526741, 'timestamp': '2025-09-10 02:52:02.061135', 'step': 22239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:02.092384', 'step': 22239, 'epoch': 3} {'type': 'loss', 'content': 0.09784190356731415, 'timestamp': '2025-09-10 02:52:02.116067', 'step': 22240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:02.147762', 'step': 22240, 'epoch': 3} {'type': 'loss', 'content': 0.1733291894197464, 'timestamp': '2025-09-10 02:52:02.150330', 'step': 22241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.180462', 'step': 22241, 'epoch': 3} {'type': 'loss', 'content': 0.07110946625471115, 'timestamp': '2025-09-10 02:52:02.183103', 'step': 22242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:02.213113', 'step': 22242, 'epoch': 3} {'type': 'loss', 'content': 0.06694108247756958, 'timestamp': '2025-09-10 02:52:02.215912', 'step': 22243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.245897', 'step': 22243, 'epoch': 3} {'type': 'loss', 'content': 0.03933300822973251, 'timestamp': '2025-09-10 02:52:02.269528', 'step': 22244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.300199', 'step': 22244, 'epoch': 3} {'type': 'loss', 'content': 0.049953922629356384, 'timestamp': '2025-09-10 02:52:02.302668', 'step': 22245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:02.332277', 'step': 22245, 'epoch': 3} {'type': 'loss', 'content': 0.11035151779651642, 'timestamp': '2025-09-10 02:52:02.334887', 'step': 22246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:02.364478', 'step': 22246, 'epoch': 3} {'type': 'loss', 'content': 0.0674256831407547, 'timestamp': '2025-09-10 02:52:02.367153', 'step': 22247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:02.397737', 'step': 22247, 'epoch': 3} {'type': 'loss', 'content': 0.07029089331626892, 'timestamp': '2025-09-10 02:52:02.421060', 'step': 22248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:02.452483', 'step': 22248, 'epoch': 3} {'type': 'loss', 'content': 0.05505291372537613, 'timestamp': '2025-09-10 02:52:02.454778', 'step': 22249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.485669', 'step': 22249, 'epoch': 3} {'type': 'loss', 'content': 0.13260316848754883, 'timestamp': '2025-09-10 02:52:02.488368', 'step': 22250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.517937', 'step': 22250, 'epoch': 3} {'type': 'loss', 'content': 0.032163433730602264, 'timestamp': '2025-09-10 02:52:02.520449', 'step': 22251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:02.550546', 'step': 22251, 'epoch': 3} {'type': 'loss', 'content': 0.06675092875957489, 'timestamp': '2025-09-10 02:52:02.574344', 'step': 22252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.605774', 'step': 22252, 'epoch': 3} {'type': 'loss', 'content': 0.07235157489776611, 'timestamp': '2025-09-10 02:52:02.608304', 'step': 22253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:02.639840', 'step': 22253, 'epoch': 3} {'type': 'loss', 'content': 0.02608029544353485, 'timestamp': '2025-09-10 02:52:02.642297', 'step': 22254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:02.673229', 'step': 22254, 'epoch': 3} {'type': 'loss', 'content': 0.08566093444824219, 'timestamp': '2025-09-10 02:52:02.675273', 'step': 22255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:02.706212', 'step': 22255, 'epoch': 3} {'type': 'loss', 'content': 0.05467825382947922, 'timestamp': '2025-09-10 02:52:02.730157', 'step': 22256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.761133', 'step': 22256, 'epoch': 3} {'type': 'loss', 'content': 0.06626827269792557, 'timestamp': '2025-09-10 02:52:02.763415', 'step': 22257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.793887', 'step': 22257, 'epoch': 3} {'type': 'loss', 'content': 0.014317093417048454, 'timestamp': '2025-09-10 02:52:02.796428', 'step': 22258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:52:02.827728', 'step': 22258, 'epoch': 3} {'type': 'loss', 'content': 0.036850083619356155, 'timestamp': '2025-09-10 02:52:02.830538', 'step': 22259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.862357', 'step': 22259, 'epoch': 3} {'type': 'loss', 'content': 0.07202646881341934, 'timestamp': '2025-09-10 02:52:02.886489', 'step': 22260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:02.920730', 'step': 22260, 'epoch': 3} {'type': 'loss', 'content': 0.06244084611535072, 'timestamp': '2025-09-10 02:52:02.922981', 'step': 22261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.953742', 'step': 22261, 'epoch': 3} {'type': 'loss', 'content': 0.06973301619291306, 'timestamp': '2025-09-10 02:52:02.956505', 'step': 22262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:02.988583', 'step': 22262, 'epoch': 3} {'type': 'loss', 'content': 0.058353979140520096, 'timestamp': '2025-09-10 02:52:02.990718', 'step': 22263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.020297', 'step': 22263, 'epoch': 3} {'type': 'loss', 'content': 0.08961860090494156, 'timestamp': '2025-09-10 02:52:03.044304', 'step': 22264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:03.074858', 'step': 22264, 'epoch': 3} {'type': 'loss', 'content': 0.03745308145880699, 'timestamp': '2025-09-10 02:52:03.077847', 'step': 22265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:03.111014', 'step': 22265, 'epoch': 3} {'type': 'loss', 'content': 0.04330982640385628, 'timestamp': '2025-09-10 02:52:03.113501', 'step': 22266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:03.143799', 'step': 22266, 'epoch': 3} {'type': 'loss', 'content': 0.04210761561989784, 'timestamp': '2025-09-10 02:52:03.146377', 'step': 22267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:03.176756', 'step': 22267, 'epoch': 3} {'type': 'loss', 'content': 0.06784028559923172, 'timestamp': '2025-09-10 02:52:03.200856', 'step': 22268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.232769', 'step': 22268, 'epoch': 3} {'type': 'loss', 'content': 0.01117510162293911, 'timestamp': '2025-09-10 02:52:03.234938', 'step': 22269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:03.265391', 'step': 22269, 'epoch': 3} {'type': 'loss', 'content': 0.06910506635904312, 'timestamp': '2025-09-10 02:52:03.267579', 'step': 22270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:03.299025', 'step': 22270, 'epoch': 3} {'type': 'loss', 'content': 0.06637576222419739, 'timestamp': '2025-09-10 02:52:03.301473', 'step': 22271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.332944', 'step': 22271, 'epoch': 3} {'type': 'loss', 'content': 0.09261324256658554, 'timestamp': '2025-09-10 02:52:03.356449', 'step': 22272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:03.387032', 'step': 22272, 'epoch': 3} {'type': 'loss', 'content': 0.03941776230931282, 'timestamp': '2025-09-10 02:52:03.389987', 'step': 22273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.420104', 'step': 22273, 'epoch': 3} {'type': 'loss', 'content': 0.0293864905834198, 'timestamp': '2025-09-10 02:52:03.422568', 'step': 22274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.453277', 'step': 22274, 'epoch': 3} {'type': 'loss', 'content': 0.04090479388833046, 'timestamp': '2025-09-10 02:52:03.455632', 'step': 22275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:52:03.485768', 'step': 22275, 'epoch': 3} {'type': 'loss', 'content': 0.07046088576316833, 'timestamp': '2025-09-10 02:52:03.509511', 'step': 22276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:03.541568', 'step': 22276, 'epoch': 3} {'type': 'loss', 'content': 0.13573706150054932, 'timestamp': '2025-09-10 02:52:03.543952', 'step': 22277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:03.574511', 'step': 22277, 'epoch': 3} {'type': 'loss', 'content': 0.04612931236624718, 'timestamp': '2025-09-10 02:52:03.576965', 'step': 22278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.607924', 'step': 22278, 'epoch': 3} {'type': 'loss', 'content': 0.06185740977525711, 'timestamp': '2025-09-10 02:52:03.611725', 'step': 22279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:03.643481', 'step': 22279, 'epoch': 3} {'type': 'loss', 'content': 0.06099604070186615, 'timestamp': '2025-09-10 02:52:03.667085', 'step': 22280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:03.696794', 'step': 22280, 'epoch': 3} {'type': 'loss', 'content': 0.0760156586766243, 'timestamp': '2025-09-10 02:52:03.699332', 'step': 22281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.729882', 'step': 22281, 'epoch': 3} {'type': 'loss', 'content': 0.0735177993774414, 'timestamp': '2025-09-10 02:52:03.733193', 'step': 22282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:03.763848', 'step': 22282, 'epoch': 3} {'type': 'loss', 'content': 0.012961842119693756, 'timestamp': '2025-09-10 02:52:03.766398', 'step': 22283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.796422', 'step': 22283, 'epoch': 3} {'type': 'loss', 'content': 0.11089390516281128, 'timestamp': '2025-09-10 02:52:03.819938', 'step': 22284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.850422', 'step': 22284, 'epoch': 3} {'type': 'loss', 'content': 0.12514358758926392, 'timestamp': '2025-09-10 02:52:03.852537', 'step': 22285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.882585', 'step': 22285, 'epoch': 3} {'type': 'loss', 'content': 0.0186099074780941, 'timestamp': '2025-09-10 02:52:03.885509', 'step': 22286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:03.916638', 'step': 22286, 'epoch': 3} {'type': 'loss', 'content': 0.009837034158408642, 'timestamp': '2025-09-10 02:52:03.919791', 'step': 22287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:03.951196', 'step': 22287, 'epoch': 3} {'type': 'loss', 'content': 0.046495452523231506, 'timestamp': '2025-09-10 02:52:03.974918', 'step': 22288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.006322', 'step': 22288, 'epoch': 3} {'type': 'loss', 'content': 0.09001349657773972, 'timestamp': '2025-09-10 02:52:04.008820', 'step': 22289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.040607', 'step': 22289, 'epoch': 3} {'type': 'loss', 'content': 0.07847865670919418, 'timestamp': '2025-09-10 02:52:04.043510', 'step': 22290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-10 02:52:04.075683', 'step': 22290, 'epoch': 3} {'type': 'loss', 'content': 0.022049255669116974, 'timestamp': '2025-09-10 02:52:04.080027', 'step': 22291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.110521', 'step': 22291, 'epoch': 3} {'type': 'loss', 'content': 0.10147713124752045, 'timestamp': '2025-09-10 02:52:04.134144', 'step': 22292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.165256', 'step': 22292, 'epoch': 3} {'type': 'loss', 'content': 0.1282276213169098, 'timestamp': '2025-09-10 02:52:04.167800', 'step': 22293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:04.198359', 'step': 22293, 'epoch': 3} {'type': 'loss', 'content': 0.040805086493492126, 'timestamp': '2025-09-10 02:52:04.200671', 'step': 22294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.230541', 'step': 22294, 'epoch': 3} {'type': 'loss', 'content': 0.05799545720219612, 'timestamp': '2025-09-10 02:52:04.234466', 'step': 22295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.268197', 'step': 22295, 'epoch': 3} {'type': 'loss', 'content': 0.10380394011735916, 'timestamp': '2025-09-10 02:52:04.292951', 'step': 22296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.322876', 'step': 22296, 'epoch': 3} {'type': 'loss', 'content': 0.0623406246304512, 'timestamp': '2025-09-10 02:52:04.325189', 'step': 22297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.355441', 'step': 22297, 'epoch': 3} {'type': 'loss', 'content': 0.03406092897057533, 'timestamp': '2025-09-10 02:52:04.358322', 'step': 22298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.389638', 'step': 22298, 'epoch': 3} {'type': 'loss', 'content': 0.03950585797429085, 'timestamp': '2025-09-10 02:52:04.392070', 'step': 22299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.422125', 'step': 22299, 'epoch': 3} {'type': 'loss', 'content': 0.05124843865633011, 'timestamp': '2025-09-10 02:52:04.445391', 'step': 22300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.476207', 'step': 22300, 'epoch': 3} {'type': 'loss', 'content': 0.03378034010529518, 'timestamp': '2025-09-10 02:52:04.478796', 'step': 22301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:04.509189', 'step': 22301, 'epoch': 3} {'type': 'loss', 'content': 0.046911027282476425, 'timestamp': '2025-09-10 02:52:04.511499', 'step': 22302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.542188', 'step': 22302, 'epoch': 3} {'type': 'loss', 'content': 0.06313959509134293, 'timestamp': '2025-09-10 02:52:04.544349', 'step': 22303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.573994', 'step': 22303, 'epoch': 3} {'type': 'loss', 'content': 0.07245853543281555, 'timestamp': '2025-09-10 02:52:04.597599', 'step': 22304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.628984', 'step': 22304, 'epoch': 3} {'type': 'loss', 'content': 0.09659962356090546, 'timestamp': '2025-09-10 02:52:04.631433', 'step': 22305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.661497', 'step': 22305, 'epoch': 3} {'type': 'loss', 'content': 0.02592318505048752, 'timestamp': '2025-09-10 02:52:04.664301', 'step': 22306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.695180', 'step': 22306, 'epoch': 3} {'type': 'loss', 'content': 0.015854811295866966, 'timestamp': '2025-09-10 02:52:04.698029', 'step': 22307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.728908', 'step': 22307, 'epoch': 3} {'type': 'loss', 'content': 0.07214962691068649, 'timestamp': '2025-09-10 02:52:04.753157', 'step': 22308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.784588', 'step': 22308, 'epoch': 3} {'type': 'loss', 'content': 0.07865837216377258, 'timestamp': '2025-09-10 02:52:04.787465', 'step': 22309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.818207', 'step': 22309, 'epoch': 3} {'type': 'loss', 'content': 0.02844492718577385, 'timestamp': '2025-09-10 02:52:04.821632', 'step': 22310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.853008', 'step': 22310, 'epoch': 3} {'type': 'loss', 'content': 0.07436762005090714, 'timestamp': '2025-09-10 02:52:04.855856', 'step': 22311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:04.891261', 'step': 22311, 'epoch': 3} {'type': 'loss', 'content': 0.1016007661819458, 'timestamp': '2025-09-10 02:52:04.915082', 'step': 22312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:04.954038', 'step': 22312, 'epoch': 3} {'type': 'loss', 'content': 0.05077631026506424, 'timestamp': '2025-09-10 02:52:04.956390', 'step': 22313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:04.986540', 'step': 22313, 'epoch': 3} {'type': 'loss', 'content': 0.06650407612323761, 'timestamp': '2025-09-10 02:52:04.988990', 'step': 22314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:05.018951', 'step': 22314, 'epoch': 3} {'type': 'loss', 'content': 0.020263852551579475, 'timestamp': '2025-09-10 02:52:05.022391', 'step': 22315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.057207', 'step': 22315, 'epoch': 3} {'type': 'loss', 'content': 0.02883535996079445, 'timestamp': '2025-09-10 02:52:05.082063', 'step': 22316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:52:05.115599', 'step': 22316, 'epoch': 3} {'type': 'loss', 'content': 0.042727209627628326, 'timestamp': '2025-09-10 02:52:05.118097', 'step': 22317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:05.163389', 'step': 22317, 'epoch': 3} {'type': 'loss', 'content': 0.035345617681741714, 'timestamp': '2025-09-10 02:52:05.166579', 'step': 22318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.198963', 'step': 22318, 'epoch': 3} {'type': 'loss', 'content': 0.0370713472366333, 'timestamp': '2025-09-10 02:52:05.201879', 'step': 22319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.232553', 'step': 22319, 'epoch': 3} {'type': 'loss', 'content': 0.03609633073210716, 'timestamp': '2025-09-10 02:52:05.256935', 'step': 22320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:05.288374', 'step': 22320, 'epoch': 3} {'type': 'loss', 'content': 0.10632967948913574, 'timestamp': '2025-09-10 02:52:05.290577', 'step': 22321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.322131', 'step': 22321, 'epoch': 3} {'type': 'loss', 'content': 0.08073282241821289, 'timestamp': '2025-09-10 02:52:05.324899', 'step': 22322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:05.355900', 'step': 22322, 'epoch': 3} {'type': 'loss', 'content': 0.07145975530147552, 'timestamp': '2025-09-10 02:52:05.358768', 'step': 22323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.390073', 'step': 22323, 'epoch': 3} {'type': 'loss', 'content': 0.06093578413128853, 'timestamp': '2025-09-10 02:52:05.413691', 'step': 22324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:05.444349', 'step': 22324, 'epoch': 3} {'type': 'loss', 'content': 0.020033644512295723, 'timestamp': '2025-09-10 02:52:05.447479', 'step': 22325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:05.478509', 'step': 22325, 'epoch': 3} {'type': 'loss', 'content': 0.0599246509373188, 'timestamp': '2025-09-10 02:52:05.481426', 'step': 22326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.513504', 'step': 22326, 'epoch': 3} {'type': 'loss', 'content': 0.04660126566886902, 'timestamp': '2025-09-10 02:52:05.515929', 'step': 22327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.546290', 'step': 22327, 'epoch': 3} {'type': 'loss', 'content': 0.18515600264072418, 'timestamp': '2025-09-10 02:52:05.570145', 'step': 22328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:05.600323', 'step': 22328, 'epoch': 3} {'type': 'loss', 'content': 0.12173821777105331, 'timestamp': '2025-09-10 02:52:05.602738', 'step': 22329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.634888', 'step': 22329, 'epoch': 3} {'type': 'loss', 'content': 0.05405547469854355, 'timestamp': '2025-09-10 02:52:05.638768', 'step': 22330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.668588', 'step': 22330, 'epoch': 3} {'type': 'loss', 'content': 0.011242343112826347, 'timestamp': '2025-09-10 02:52:05.671470', 'step': 22331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:05.701784', 'step': 22331, 'epoch': 3} {'type': 'loss', 'content': 0.06220277026295662, 'timestamp': '2025-09-10 02:52:05.725846', 'step': 22332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:52:05.756886', 'step': 22332, 'epoch': 3} {'type': 'loss', 'content': 0.046588797122240067, 'timestamp': '2025-09-10 02:52:05.759284', 'step': 22333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:05.789664', 'step': 22333, 'epoch': 3} {'type': 'loss', 'content': 0.016103452071547508, 'timestamp': '2025-09-10 02:52:05.793257', 'step': 22334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.823725', 'step': 22334, 'epoch': 3} {'type': 'loss', 'content': 0.06994939595460892, 'timestamp': '2025-09-10 02:52:05.826147', 'step': 22335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:05.856559', 'step': 22335, 'epoch': 3} {'type': 'loss', 'content': 0.021227890625596046, 'timestamp': '2025-09-10 02:52:05.880380', 'step': 22336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:52:05.910651', 'step': 22336, 'epoch': 3} {'type': 'loss', 'content': 0.10718896239995956, 'timestamp': '2025-09-10 02:52:05.914784', 'step': 22337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:05.946826', 'step': 22337, 'epoch': 3} {'type': 'loss', 'content': 0.016316531226038933, 'timestamp': '2025-09-10 02:52:05.949938', 'step': 22338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:05.980744', 'step': 22338, 'epoch': 3} {'type': 'loss', 'content': 0.13085323572158813, 'timestamp': '2025-09-10 02:52:05.983159', 'step': 22339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:06.019007', 'step': 22339, 'epoch': 3} {'type': 'loss', 'content': 0.08870222419500351, 'timestamp': '2025-09-10 02:52:06.042924', 'step': 22340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:06.073205', 'step': 22340, 'epoch': 3} {'type': 'loss', 'content': 0.019275326281785965, 'timestamp': '2025-09-10 02:52:06.075546', 'step': 22341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:06.106463', 'step': 22341, 'epoch': 3} {'type': 'loss', 'content': 0.08960449695587158, 'timestamp': '2025-09-10 02:52:06.108550', 'step': 22342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:06.138050', 'step': 22342, 'epoch': 3} {'type': 'loss', 'content': 0.041706815361976624, 'timestamp': '2025-09-10 02:52:06.140372', 'step': 22343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:06.172342', 'step': 22343, 'epoch': 3} {'type': 'loss', 'content': 0.028567016124725342, 'timestamp': '2025-09-10 02:52:06.195923', 'step': 22344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:06.225665', 'step': 22344, 'epoch': 3} {'type': 'loss', 'content': 0.095634326338768, 'timestamp': '2025-09-10 02:52:06.228380', 'step': 22345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:06.261012', 'step': 22345, 'epoch': 3} {'type': 'loss', 'content': 0.024471022188663483, 'timestamp': '2025-09-10 02:52:06.263439', 'step': 22346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:06.293274', 'step': 22346, 'epoch': 3} {'type': 'loss', 'content': 0.043256331235170364, 'timestamp': '2025-09-10 02:52:06.295672', 'step': 22347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:06.325183', 'step': 22347, 'epoch': 3} {'type': 'loss', 'content': 0.04608907923102379, 'timestamp': '2025-09-10 02:52:06.348609', 'step': 22348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:52:06.379566', 'step': 22348, 'epoch': 3} {'type': 'loss', 'content': 0.08482982963323593, 'timestamp': '2025-09-10 02:52:06.382272', 'step': 22349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:06.412235', 'step': 22349, 'epoch': 3} {'type': 'loss', 'content': 0.07181572169065475, 'timestamp': '2025-09-10 02:52:06.414524', 'step': 22350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:06.445542', 'step': 22350, 'epoch': 3} {'type': 'loss', 'content': 0.10037985444068909, 'timestamp': '2025-09-10 02:52:06.449706', 'step': 22351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:06.483070', 'step': 22351, 'epoch': 3} {'type': 'loss', 'content': 0.1386011689901352, 'timestamp': '2025-09-10 02:52:06.506985', 'step': 22352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:06.538135', 'step': 22352, 'epoch': 3} {'type': 'loss', 'content': 0.07126384228467941, 'timestamp': '2025-09-10 02:52:06.540560', 'step': 22353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:52:06.574127', 'step': 22353, 'epoch': 3} {'type': 'loss', 'content': 0.024485735222697258, 'timestamp': '2025-09-10 02:52:06.577239', 'step': 22354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:06.607567', 'step': 22354, 'epoch': 3} {'type': 'loss', 'content': 0.04940000921487808, 'timestamp': '2025-09-10 02:52:06.609497', 'step': 22355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:06.639022', 'step': 22355, 'epoch': 3} {'type': 'loss', 'content': 0.06764112412929535, 'timestamp': '2025-09-10 02:52:06.662889', 'step': 22356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:06.693329', 'step': 22356, 'epoch': 3} {'type': 'loss', 'content': 0.08252250403165817, 'timestamp': '2025-09-10 02:52:06.695720', 'step': 22357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:06.725875', 'step': 22357, 'epoch': 3} {'type': 'loss', 'content': 0.007679885718971491, 'timestamp': '2025-09-10 02:52:06.733299', 'step': 22358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:06.770031', 'step': 22358, 'epoch': 3} {'type': 'loss', 'content': 0.06613577902317047, 'timestamp': '2025-09-10 02:52:06.774215', 'step': 22359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:06.806383', 'step': 22359, 'epoch': 3} {'type': 'loss', 'content': 0.06848751753568649, 'timestamp': '2025-09-10 02:52:06.829981', 'step': 22360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:06.860248', 'step': 22360, 'epoch': 3} {'type': 'loss', 'content': 0.05278452858328819, 'timestamp': '2025-09-10 02:52:06.862610', 'step': 22361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:06.909329', 'step': 22361, 'epoch': 3} {'type': 'loss', 'content': 0.0505240261554718, 'timestamp': '2025-09-10 02:52:06.915251', 'step': 22362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-10 02:52:06.948335', 'step': 22362, 'epoch': 3} {'type': 'loss', 'content': 0.0889354720711708, 'timestamp': '2025-09-10 02:52:06.952953', 'step': 22363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:06.986628', 'step': 22363, 'epoch': 3} {'type': 'loss', 'content': 0.02868165634572506, 'timestamp': '2025-09-10 02:52:07.010497', 'step': 22364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:07.046517', 'step': 22364, 'epoch': 3} {'type': 'loss', 'content': 0.0499231293797493, 'timestamp': '2025-09-10 02:52:07.049207', 'step': 22365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:07.080286', 'step': 22365, 'epoch': 3} {'type': 'loss', 'content': 0.02481803670525551, 'timestamp': '2025-09-10 02:52:07.082648', 'step': 22366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:07.116479', 'step': 22366, 'epoch': 3} {'type': 'loss', 'content': 0.11819679290056229, 'timestamp': '2025-09-10 02:52:07.119407', 'step': 22367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:07.150434', 'step': 22367, 'epoch': 3} {'type': 'loss', 'content': 0.12275747954845428, 'timestamp': '2025-09-10 02:52:07.174347', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:52:15.168145', 'step': 22368, 'epoch': 3} {'type': 'pplx', 'content': 7880.964903962803, 'timestamp': '2025-09-10 02:52:15.171386', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-10 02:52:15.200905', 'step': 22368, 'epoch': 3} {'type': 'loss', 'content': 0.07784989476203918, 'timestamp': '2025-09-10 02:52:15.203377', 'step': 22369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:15.234446', 'step': 22369, 'epoch': 3} {'type': 'loss', 'content': 0.06055011972784996, 'timestamp': '2025-09-10 02:52:15.236646', 'step': 22370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:15.267600', 'step': 22370, 'epoch': 3} {'type': 'loss', 'content': 0.04601346328854561, 'timestamp': '2025-09-10 02:52:15.270344', 'step': 22371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:15.300568', 'step': 22371, 'epoch': 3} {'type': 'loss', 'content': 0.008108601905405521, 'timestamp': '2025-09-10 02:52:15.324231', 'step': 22372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:15.353940', 'step': 22372, 'epoch': 3} {'type': 'loss', 'content': 0.040856681764125824, 'timestamp': '2025-09-10 02:52:15.356245', 'step': 22373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:15.386416', 'step': 22373, 'epoch': 3} {'type': 'loss', 'content': 0.023193612694740295, 'timestamp': '2025-09-10 02:52:15.388794', 'step': 22374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:52:15.418760', 'step': 22374, 'epoch': 3} {'type': 'loss', 'content': 0.1325654536485672, 'timestamp': '2025-09-10 02:52:15.420998', 'step': 22375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:52:15.450214', 'step': 22375, 'epoch': 3} {'type': 'loss', 'content': 0.023044990375638008, 'timestamp': '2025-09-10 02:52:15.474087', 'step': 22376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:15.504172', 'step': 22376, 'epoch': 3} {'type': 'loss', 'content': 0.060997702181339264, 'timestamp': '2025-09-10 02:52:15.506532', 'step': 22377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:52:15.538690', 'step': 22377, 'epoch': 3} {'type': 'loss', 'content': 0.023325394839048386, 'timestamp': '2025-09-10 02:52:15.541259', 'step': 22378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:52:15.571341', 'step': 22378, 'epoch': 3} {'type': 'loss', 'content': 0.03711457550525665, 'timestamp': '2025-09-10 02:52:15.573940', 'step': 22379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1542724875376}, 'timestamp': '2025-09-10 02:52:15.603760', 'step': 22379, 'epoch': 3} {'type': 'loss', 'content': 0.000164338547619991, 'timestamp': '2025-09-10 02:52:15.627016', 'step': 22380, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-10 02:52:23.347079', 'step': 22380, 'epoch': 3} {'type': 'pplx', 'content': 8240.228920267442, 'timestamp': '2025-09-10 02:52:23.350230', 'step': 22380, 'epoch': 3} {'type': 'best_pplx', 'content': 7597.507864978297, 'timestamp': '2025-09-10 02:52:23.352081', 'step': 22380, 'epoch': 3} {'type': 'best_step', 'content': 932, 'timestamp': '2025-09-10 02:52:23.353560', 'step': 22380, 'epoch': 3} {'type': 'total_pplx_flops', 'content': 50323539177094400, 'timestamp': '2025-09-10 02:52:23.355211', 'step': 22380, 'epoch': 3} {'type': 'total_train_flops', 'content': 76563139214162640, 'timestamp': '2025-09-10 02:52:23.357192', 'step': 22380, 'epoch': 3}